{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14933296000093332, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014107840401785698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 603.2777709960938, "completions/mean_terminated_length": 553.2979125976562, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 9.333310000058333e-05, "grad_norm": 0.15226596593856812, "learning_rate": 2e-07, "loss": 0.0169, "num_tokens": 80610803.0, "reward": 0.4960327446460724, "reward_std": 0.26209649443626404, "rewards/simpleverify_reward/mean": 0.49603271484375, "rewards/simpleverify_reward/std": 0.4999862313270569, "step": 1 }, { "clip_ratio/high_max": 0.0026855107498704456, "clip_ratio/high_mean": 0.0011256248108111322, "clip_ratio/low_mean": 0.0006116732183727436, "clip_ratio/low_min": 7.207221642602235e-05, "clip_ratio/region_mean": 0.001737298061925685, "epoch": 0.00018666620000116666, "grad_norm": 0.1652979701757431, "learning_rate": 2e-07, "loss": -0.0121, "step": 2 }, { "clip_ratio/high_max": 0.0025302996291429736, "clip_ratio/high_mean": 0.0010502484692551661, "clip_ratio/low_mean": 0.0006030750610079849, "clip_ratio/low_min": 7.160595032473793e-05, "clip_ratio/region_mean": 0.0016533235175302252, "epoch": 0.00027999930000175, "grad_norm": 0.13763025403022766, "learning_rate": 2e-07, "loss": -0.0305, "step": 3 }, { "clip_ratio/high_max": 0.0024031228022067808, "clip_ratio/high_mean": 0.0009716729218780529, "clip_ratio/low_mean": 0.0007264342748385388, "clip_ratio/low_min": 6.412036327674286e-05, "clip_ratio/region_mean": 0.0016981071676127613, "epoch": 0.0003733324000023333, "grad_norm": 0.16416049003601074, "learning_rate": 2e-07, "loss": 0.0393, "step": 4 }, { "clip_ratio/high_max": 0.0023153340771386866, "clip_ratio/high_mean": 0.0010534081775404047, "clip_ratio/low_mean": 0.0006291228546615457, "clip_ratio/low_min": 0.00013712085547012975, "clip_ratio/region_mean": 0.0016825310085550882, "epoch": 0.0004666655000029167, "grad_norm": 0.15439161658287048, "learning_rate": 2e-07, "loss": -0.0196, "step": 5 }, { "clip_ratio/high_max": 0.002705680140934419, "clip_ratio/high_mean": 0.0010525900652282871, "clip_ratio/low_mean": 0.000656371860713989, "clip_ratio/low_min": 3.9551418922201265e-05, "clip_ratio/region_mean": 0.0017089618850150146, "epoch": 0.0005599986000035, "grad_norm": 0.1429436206817627, "learning_rate": 2e-07, "loss": 0.0147, "step": 6 }, { "clip_ratio/high_max": 0.00227601443475578, "clip_ratio/high_mean": 0.0010625544418871868, "clip_ratio/low_mean": 0.0006774849589419318, "clip_ratio/low_min": 8.50881888254662e-05, "clip_ratio/region_mean": 0.001740039384458214, "epoch": 0.0006533317000040833, "grad_norm": 0.14794810116291046, "learning_rate": 2e-07, "loss": 0.0189, "step": 7 }, { "clip_ratio/high_max": 0.002208251135016326, "clip_ratio/high_mean": 0.0010921508619503584, "clip_ratio/low_mean": 0.0006793623051635223, "clip_ratio/low_min": 5.477833929035114e-05, "clip_ratio/region_mean": 0.0017715131471049972, "epoch": 0.0007466648000046666, "grad_norm": 0.16203421354293823, "learning_rate": 2e-07, "loss": -0.0032, "step": 8 }, { "clip_ratio/high_max": 0.0027384060667827725, "clip_ratio/high_mean": 0.0011402233831177, "clip_ratio/low_mean": 0.0005811858845845563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017214092804351822, "epoch": 0.00083999790000525, "grad_norm": 0.13111692667007446, "learning_rate": 2e-07, "loss": -0.0567, "step": 9 }, { "clip_ratio/high_max": 0.0020794088050024584, "clip_ratio/high_mean": 0.0009608974251023028, "clip_ratio/low_mean": 0.0006140016066638054, "clip_ratio/low_min": 8.490507389069535e-05, "clip_ratio/region_mean": 0.001574899062688928, "epoch": 0.0009333310000058334, "grad_norm": 0.1865895688533783, "learning_rate": 2e-07, "loss": 0.0087, "step": 10 }, { "clip_ratio/high_max": 0.00221535810851492, "clip_ratio/high_mean": 0.0010552943858783692, "clip_ratio/low_mean": 0.0007242449482873781, "clip_ratio/low_min": 9.243800741387531e-05, "clip_ratio/region_mean": 0.0017795393578126095, "epoch": 0.0010266641000064166, "grad_norm": 0.14398416876792908, "learning_rate": 2e-07, "loss": 0.0471, "step": 11 }, { "clip_ratio/high_max": 0.002243391500087455, "clip_ratio/high_mean": 0.0010248201979266014, "clip_ratio/low_mean": 0.0007454685382981552, "clip_ratio/low_min": 0.00011981024090346182, "clip_ratio/region_mean": 0.0017702887053019367, "epoch": 0.001119997200007, "grad_norm": 0.14180755615234375, "learning_rate": 2e-07, "loss": 0.0314, "step": 12 }, { "clip_ratio/high_max": 0.002350613405724289, "clip_ratio/high_mean": 0.0009637280236347578, "clip_ratio/low_mean": 0.0005558926386584062, "clip_ratio/low_min": 5.653571406583069e-05, "clip_ratio/region_mean": 0.0015196206295513548, "epoch": 0.0012133303000075833, "grad_norm": 0.13647836446762085, "learning_rate": 2e-07, "loss": -0.0054, "step": 13 }, { "clip_ratio/high_max": 0.002185715718951542, "clip_ratio/high_mean": 0.0009792264754651114, "clip_ratio/low_mean": 0.000533153691321786, "clip_ratio/low_min": 7.11764050720376e-06, "clip_ratio/region_mean": 0.0015123801786103286, "epoch": 0.0013066634000081666, "grad_norm": 0.1443215310573578, "learning_rate": 2e-07, "loss": 0.0061, "step": 14 }, { "clip_ratio/high_max": 0.002351396622543689, "clip_ratio/high_mean": 0.0008825419790809974, "clip_ratio/low_mean": 0.0006741069546478684, "clip_ratio/low_min": 8.389318554691272e-05, "clip_ratio/region_mean": 0.001556648945552297, "epoch": 0.00139999650000875, "grad_norm": 0.12755635380744934, "learning_rate": 2e-07, "loss": 0.0334, "step": 15 }, { "clip_ratio/high_max": 0.002210871047282126, "clip_ratio/high_mean": 0.0009745259776536841, "clip_ratio/low_mean": 0.0006068867678550305, "clip_ratio/low_min": 4.074454409419559e-05, "clip_ratio/region_mean": 0.0015814127618796192, "epoch": 0.0014933296000093333, "grad_norm": 0.1446833461523056, "learning_rate": 2e-07, "loss": -0.015, "step": 16 }, { "clip_ratio/high_max": 0.0026980257243849337, "clip_ratio/high_mean": 0.0010633438287186436, "clip_ratio/low_mean": 0.0006677285546174971, "clip_ratio/low_min": 6.359473536576843e-05, "clip_ratio/region_mean": 0.0017310723778791726, "epoch": 0.0015866627000099165, "grad_norm": 0.14644832909107208, "learning_rate": 2e-07, "loss": 0.024, "step": 17 }, { "clip_ratio/high_max": 0.0018859548945329152, "clip_ratio/high_mean": 0.0009480064527451759, "clip_ratio/low_mean": 0.0006601967706956202, "clip_ratio/low_min": 9.549986953061307e-05, "clip_ratio/region_mean": 0.0016082032525446266, "epoch": 0.0016799958000105, "grad_norm": 0.16266238689422607, "learning_rate": 2e-07, "loss": 0.034, "step": 18 }, { "clip_ratio/high_max": 0.0025572568993084133, "clip_ratio/high_mean": 0.0010283998526574578, "clip_ratio/low_mean": 0.0005791197681901394, "clip_ratio/low_min": 4.354961674835067e-05, "clip_ratio/region_mean": 0.0016075196363090072, "epoch": 0.0017733289000110833, "grad_norm": 0.14081984758377075, "learning_rate": 2e-07, "loss": -0.0026, "step": 19 }, { "clip_ratio/high_max": 0.0023484854900743812, "clip_ratio/high_mean": 0.001032120424497407, "clip_ratio/low_mean": 0.0006547435968968784, "clip_ratio/low_min": 5.282235906634014e-05, "clip_ratio/region_mean": 0.0016868639941094443, "epoch": 0.0018666620000116667, "grad_norm": 0.15375037491321564, "learning_rate": 2e-07, "loss": 0.0094, "step": 20 }, { "clip_ratio/high_max": 0.0022490218543680385, "clip_ratio/high_mean": 0.000980297973001143, "clip_ratio/low_mean": 0.0006408726767404005, "clip_ratio/low_min": 3.1734762160340324e-05, "clip_ratio/region_mean": 0.0016211706679314375, "epoch": 0.00195999510001225, "grad_norm": 0.13868862390518188, "learning_rate": 2e-07, "loss": 0.0058, "step": 21 }, { "clip_ratio/high_max": 0.002334155586140696, "clip_ratio/high_mean": 0.001086789890905493, "clip_ratio/low_mean": 0.00071046191987989, "clip_ratio/low_min": 2.1609082068607677e-05, "clip_ratio/region_mean": 0.001797251810785383, "epoch": 0.002053328200012833, "grad_norm": 0.13397537171840668, "learning_rate": 2e-07, "loss": -0.0264, "step": 22 }, { "clip_ratio/high_max": 0.0025217489528586157, "clip_ratio/high_mean": 0.0010892995578615228, "clip_ratio/low_mean": 0.0007800756138749421, "clip_ratio/low_min": 0.00014082737106946297, "clip_ratio/region_mean": 0.0018693751990213059, "epoch": 0.0021466613000134167, "grad_norm": 0.16504544019699097, "learning_rate": 2e-07, "loss": 0.0375, "step": 23 }, { "clip_ratio/high_max": 0.002318485625437461, "clip_ratio/high_mean": 0.0010083773577207467, "clip_ratio/low_mean": 0.0006978539258852834, "clip_ratio/low_min": 0.0001557414971102844, "clip_ratio/region_mean": 0.0017062312763300724, "epoch": 0.002239994400014, "grad_norm": 0.14871977269649506, "learning_rate": 2e-07, "loss": 0.0237, "step": 24 }, { "clip_ratio/high_max": 0.0019595956327975728, "clip_ratio/high_mean": 0.0008987899727799231, "clip_ratio/low_mean": 0.0007496341422665864, "clip_ratio/low_min": 5.087932458991418e-05, "clip_ratio/region_mean": 0.0016484241568832658, "epoch": 0.002333327500014583, "grad_norm": 0.14867891371250153, "learning_rate": 2e-07, "loss": 0.0181, "step": 25 }, { "clip_ratio/high_max": 0.0021967895445413888, "clip_ratio/high_mean": 0.0010050516320916358, "clip_ratio/low_mean": 0.0006741976321791299, "clip_ratio/low_min": 4.255769363226136e-05, "clip_ratio/region_mean": 0.0016792492679087445, "epoch": 0.0024266606000151666, "grad_norm": 0.1324433982372284, "learning_rate": 2e-07, "loss": 0.016, "step": 26 }, { "clip_ratio/high_max": 0.00221184497786453, "clip_ratio/high_mean": 0.0009845319473242853, "clip_ratio/low_mean": 0.0006456787523347884, "clip_ratio/low_min": 3.370817330505815e-05, "clip_ratio/region_mean": 0.001630210656003328, "epoch": 0.00251999370001575, "grad_norm": 0.2621995806694031, "learning_rate": 2e-07, "loss": 0.0267, "step": 27 }, { "clip_ratio/high_max": 0.0025492762797512114, "clip_ratio/high_mean": 0.0010747178639576305, "clip_ratio/low_mean": 0.0007755294536764268, "clip_ratio/low_min": 0.00011764569444494555, "clip_ratio/region_mean": 0.0018502473467378877, "epoch": 0.002613326800016333, "grad_norm": 0.14637550711631775, "learning_rate": 2e-07, "loss": 0.0296, "step": 28 }, { "clip_ratio/high_max": 0.002191332503571175, "clip_ratio/high_mean": 0.0009977854570024647, "clip_ratio/low_mean": 0.0007603040994581534, "clip_ratio/low_min": 0.0001385093892167788, "clip_ratio/region_mean": 0.001758089529175777, "epoch": 0.0027066599000169166, "grad_norm": 0.16185365617275238, "learning_rate": 2e-07, "loss": 0.04, "step": 29 }, { "clip_ratio/high_max": 0.002307657116034534, "clip_ratio/high_mean": 0.0009033352707774611, "clip_ratio/low_mean": 0.0007147880496631842, "clip_ratio/low_min": 8.216399965021992e-05, "clip_ratio/region_mean": 0.0016181233077077195, "epoch": 0.0027999930000175, "grad_norm": 0.13148848712444305, "learning_rate": 2e-07, "loss": 0.038, "step": 30 }, { "clip_ratio/high_max": 0.0021877511389902793, "clip_ratio/high_mean": 0.0010329049073334318, "clip_ratio/low_mean": 0.000748579832361429, "clip_ratio/low_min": 6.69975906930631e-05, "clip_ratio/region_mean": 0.0017814847378758714, "epoch": 0.002893326100018083, "grad_norm": 0.17129682004451752, "learning_rate": 2e-07, "loss": 0.0117, "step": 31 }, { "clip_ratio/high_max": 0.0022434907587012276, "clip_ratio/high_mean": 0.0009733073347888421, "clip_ratio/low_mean": 0.000713086366886273, "clip_ratio/low_min": 1.114876886276761e-05, "clip_ratio/region_mean": 0.001686393705313094, "epoch": 0.0029866592000186666, "grad_norm": 0.14859792590141296, "learning_rate": 2e-07, "loss": 0.0567, "step": 32 }, { "clip_ratio/high_max": 0.0021690325884264894, "clip_ratio/high_mean": 0.001026012907459517, "clip_ratio/low_mean": 0.0007430598598148208, "clip_ratio/low_min": 5.558419979934115e-05, "clip_ratio/region_mean": 0.0017690727909212, "epoch": 0.00307999230001925, "grad_norm": 0.16069963574409485, "learning_rate": 2e-07, "loss": 0.0226, "step": 33 }, { "clip_ratio/high_max": 0.0023200569703476503, "clip_ratio/high_mean": 0.0009928549370670225, "clip_ratio/low_mean": 0.000743487267754972, "clip_ratio/low_min": 0.00010463964463269804, "clip_ratio/region_mean": 0.001736342215735931, "epoch": 0.003173325400019833, "grad_norm": 0.17957302927970886, "learning_rate": 2e-07, "loss": 0.0358, "step": 34 }, { "clip_ratio/high_max": 0.002232205333712045, "clip_ratio/high_mean": 0.0009406781464349478, "clip_ratio/low_mean": 0.0007508251910621766, "clip_ratio/low_min": 3.246802953071892e-05, "clip_ratio/region_mean": 0.001691503362962976, "epoch": 0.0032666585000204165, "grad_norm": 0.18125486373901367, "learning_rate": 2e-07, "loss": 0.0335, "step": 35 }, { "clip_ratio/high_max": 0.00237008101248648, "clip_ratio/high_mean": 0.0010432500639581122, "clip_ratio/low_mean": 0.0007043413716019131, "clip_ratio/low_min": 4.8047357267932966e-05, "clip_ratio/region_mean": 0.0017475914864917286, "epoch": 0.003359991600021, "grad_norm": 0.15145115554332733, "learning_rate": 2e-07, "loss": 0.0241, "step": 36 }, { "clip_ratio/high_max": 0.002563024976552697, "clip_ratio/high_mean": 0.0010309857952961465, "clip_ratio/low_mean": 0.0007508490234613419, "clip_ratio/low_min": 0.00011208651358174393, "clip_ratio/region_mean": 0.0017818348496803083, "epoch": 0.0034533247000215835, "grad_norm": 0.26259833574295044, "learning_rate": 2e-07, "loss": 0.041, "step": 37 }, { "clip_ratio/high_max": 0.002667009917786345, "clip_ratio/high_mean": 0.0011350483546266332, "clip_ratio/low_mean": 0.0006800539285904961, "clip_ratio/low_min": 4.993234142602887e-05, "clip_ratio/region_mean": 0.0018151022668462247, "epoch": 0.0035466578000221665, "grad_norm": 0.1589832454919815, "learning_rate": 2e-07, "loss": -0.0055, "step": 38 }, { "clip_ratio/high_max": 0.0022534245072165504, "clip_ratio/high_mean": 0.0010167706041102065, "clip_ratio/low_mean": 0.000752102965634549, "clip_ratio/low_min": 0.00010989894872182049, "clip_ratio/region_mean": 0.0017688735752017237, "epoch": 0.00363999090002275, "grad_norm": 0.16235077381134033, "learning_rate": 2e-07, "loss": 0.0104, "step": 39 }, { "clip_ratio/high_max": 0.0021170013278606348, "clip_ratio/high_mean": 0.0009705373049655464, "clip_ratio/low_mean": 0.0007387842024400015, "clip_ratio/low_min": 8.674604123370955e-05, "clip_ratio/region_mean": 0.001709321528323926, "epoch": 0.0037333240000233334, "grad_norm": 0.1307630091905594, "learning_rate": 2e-07, "loss": 0.0193, "step": 40 }, { "clip_ratio/high_max": 0.0028063174031558447, "clip_ratio/high_mean": 0.0011612276648520492, "clip_ratio/low_mean": 0.0007333334087888943, "clip_ratio/low_min": 0.00012489808705140604, "clip_ratio/region_mean": 0.001894561035442166, "epoch": 0.0038266571000239165, "grad_norm": 0.1325671523809433, "learning_rate": 2e-07, "loss": -0.0054, "step": 41 }, { "clip_ratio/high_max": 0.0023343516513705254, "clip_ratio/high_mean": 0.0009843094485404436, "clip_ratio/low_mean": 0.0009499793850409333, "clip_ratio/low_min": 8.664441884320695e-05, "clip_ratio/region_mean": 0.0019342888408573344, "epoch": 0.0039199902000245, "grad_norm": 0.14978283643722534, "learning_rate": 2e-07, "loss": 0.0338, "step": 42 }, { "clip_ratio/high_max": 0.0026751063196570612, "clip_ratio/high_mean": 0.0012017712615488563, "clip_ratio/low_mean": 0.0008109274294838542, "clip_ratio/low_min": 0.00011104511941084638, "clip_ratio/region_mean": 0.0020126986855757423, "epoch": 0.004013323300025083, "grad_norm": 0.1430795043706894, "learning_rate": 2e-07, "loss": -0.0006, "step": 43 }, { "clip_ratio/high_max": 0.002226410506409593, "clip_ratio/high_mean": 0.001006771088214009, "clip_ratio/low_mean": 0.0008320151646330487, "clip_ratio/low_min": 0.00018646756507223472, "clip_ratio/region_mean": 0.0018387862655799836, "epoch": 0.004106656400025666, "grad_norm": 0.13031642138957977, "learning_rate": 2e-07, "loss": 0.0042, "step": 44 }, { "clip_ratio/high_max": 0.002572059296653606, "clip_ratio/high_mean": 0.001111016956201638, "clip_ratio/low_mean": 0.0009562791892676614, "clip_ratio/low_min": 5.3831755849387264e-05, "clip_ratio/region_mean": 0.002067296125460416, "epoch": 0.00419998950002625, "grad_norm": 0.14828446507453918, "learning_rate": 2e-07, "loss": 0.0178, "step": 45 }, { "clip_ratio/high_max": 0.0025222725889761932, "clip_ratio/high_mean": 0.0010423919484310318, "clip_ratio/low_mean": 0.0007909676660347031, "clip_ratio/low_min": 9.384907298226608e-05, "clip_ratio/region_mean": 0.0018333595871808939, "epoch": 0.004293322600026833, "grad_norm": 0.15505465865135193, "learning_rate": 2e-07, "loss": 0.0439, "step": 46 }, { "clip_ratio/high_max": 0.0021047551636002026, "clip_ratio/high_mean": 0.0009077926306417794, "clip_ratio/low_mean": 0.0007585885159642203, "clip_ratio/low_min": 7.39135175535921e-05, "clip_ratio/region_mean": 0.0016663811438775156, "epoch": 0.004386655700027417, "grad_norm": 0.16200260818004608, "learning_rate": 2e-07, "loss": 0.0603, "step": 47 }, { "clip_ratio/high_max": 0.00229677894822089, "clip_ratio/high_mean": 0.001016259386233287, "clip_ratio/low_mean": 0.0008135440075420775, "clip_ratio/low_min": 6.104169733589515e-05, "clip_ratio/region_mean": 0.0018298033683095127, "epoch": 0.004479988800028, "grad_norm": 0.15192489326000214, "learning_rate": 2e-07, "loss": 0.0274, "step": 48 }, { "clip_ratio/high_max": 0.0027774704940384254, "clip_ratio/high_mean": 0.0010080403280881, "clip_ratio/low_mean": 0.0008196654034691164, "clip_ratio/low_min": 9.272444185626227e-05, "clip_ratio/region_mean": 0.0018277057315572165, "epoch": 0.004573321900028583, "grad_norm": 0.17384760081768036, "learning_rate": 2e-07, "loss": 0.0526, "step": 49 }, { "clip_ratio/high_max": 0.002093526069074869, "clip_ratio/high_mean": 0.0009535997196508106, "clip_ratio/low_mean": 0.0007246116292662919, "clip_ratio/low_min": 0.00012367929457468563, "clip_ratio/region_mean": 0.0016782113889348693, "epoch": 0.004666655000029166, "grad_norm": 0.4138626456260681, "learning_rate": 2e-07, "loss": 0.0422, "step": 50 }, { "clip_ratio/high_max": 0.0021515874068427365, "clip_ratio/high_mean": 0.0009167434800474439, "clip_ratio/low_mean": 0.0008631627897557337, "clip_ratio/low_min": 0.00015628979235771112, "clip_ratio/region_mean": 0.0017799062552512623, "epoch": 0.00475998810002975, "grad_norm": 0.1734093278646469, "learning_rate": 2e-07, "loss": 0.0585, "step": 51 }, { "clip_ratio/high_max": 0.0027632777564576827, "clip_ratio/high_mean": 0.0011283850835752673, "clip_ratio/low_mean": 0.000805856216175016, "clip_ratio/low_min": 0.00010226385984424269, "clip_ratio/region_mean": 0.0019342413506819867, "epoch": 0.004853321200030333, "grad_norm": 0.19594521820545197, "learning_rate": 2e-07, "loss": 0.016, "step": 52 }, { "clip_ratio/high_max": 0.0022071929124649614, "clip_ratio/high_mean": 0.0010580600246612448, "clip_ratio/low_mean": 0.0008659018130856566, "clip_ratio/low_min": 6.15788230788894e-05, "clip_ratio/region_mean": 0.0019239617977291346, "epoch": 0.004946654300030917, "grad_norm": 0.17863456904888153, "learning_rate": 2e-07, "loss": 0.0264, "step": 53 }, { "clip_ratio/high_max": 0.0024368626982322894, "clip_ratio/high_mean": 0.0011928184467251413, "clip_ratio/low_mean": 0.000822635385702597, "clip_ratio/low_min": 0.00013990535080665722, "clip_ratio/region_mean": 0.0020154538433416747, "epoch": 0.0050399874000315, "grad_norm": 0.19694636762142181, "learning_rate": 2e-07, "loss": 0.0011, "step": 54 }, { "clip_ratio/high_max": 0.002929450958617963, "clip_ratio/high_mean": 0.0012396831334626768, "clip_ratio/low_mean": 0.0007807874735590303, "clip_ratio/low_min": 0.00013336174606592976, "clip_ratio/region_mean": 0.0020204705942887813, "epoch": 0.005133320500032083, "grad_norm": 0.19725419580936432, "learning_rate": 2e-07, "loss": 0.0115, "step": 55 }, { "clip_ratio/high_max": 0.0028225080750416964, "clip_ratio/high_mean": 0.0012325172683631536, "clip_ratio/low_mean": 0.0008001118712854804, "clip_ratio/low_min": 7.073291999404319e-05, "clip_ratio/region_mean": 0.0020326290978118777, "epoch": 0.005226653600032666, "grad_norm": 0.1624283641576767, "learning_rate": 2e-07, "loss": -0.0038, "step": 56 }, { "clip_ratio/high_max": 0.002502915078366641, "clip_ratio/high_mean": 0.0010232139175059274, "clip_ratio/low_mean": 0.000877360380400205, "clip_ratio/low_min": 9.261917966796318e-05, "clip_ratio/region_mean": 0.0019005742578883655, "epoch": 0.00531998670003325, "grad_norm": 0.44605982303619385, "learning_rate": 2e-07, "loss": 0.0394, "step": 57 }, { "clip_ratio/high_max": 0.0020510404065134935, "clip_ratio/high_mean": 0.0009520262228761567, "clip_ratio/low_mean": 0.0009041506746143568, "clip_ratio/low_min": 5.323639379639644e-05, "clip_ratio/region_mean": 0.0018561769029474817, "epoch": 0.005413319800033833, "grad_norm": 0.19620570540428162, "learning_rate": 2e-07, "loss": 0.0312, "step": 58 }, { "clip_ratio/high_max": 0.0022402981703635305, "clip_ratio/high_mean": 0.0010909843804256525, "clip_ratio/low_mean": 0.0009064340847544372, "clip_ratio/low_min": 9.927446353685809e-05, "clip_ratio/region_mean": 0.0019974184833699837, "epoch": 0.005506652900034417, "grad_norm": 0.16989979147911072, "learning_rate": 2e-07, "loss": 0.0298, "step": 59 }, { "clip_ratio/high_max": 0.0024603563506389037, "clip_ratio/high_mean": 0.001084091894881567, "clip_ratio/low_mean": 0.0008537646262993803, "clip_ratio/low_min": 7.753918544040062e-05, "clip_ratio/region_mean": 0.001937856519361958, "epoch": 0.005599986000035, "grad_norm": 0.1661187708377838, "learning_rate": 2e-07, "loss": 0.0143, "step": 60 }, { "clip_ratio/high_max": 0.0027738240751205012, "clip_ratio/high_mean": 0.0011560479215404484, "clip_ratio/low_mean": 0.0008643481487524696, "clip_ratio/low_min": 0.00015047538181534037, "clip_ratio/region_mean": 0.0020203961175866425, "epoch": 0.005693319100035584, "grad_norm": 0.15792235732078552, "learning_rate": 2e-07, "loss": 0.0124, "step": 61 }, { "clip_ratio/high_max": 0.0023871076045907103, "clip_ratio/high_mean": 0.0011370957072358578, "clip_ratio/low_mean": 0.0007909991363703739, "clip_ratio/low_min": 5.895521371712675e-05, "clip_ratio/region_mean": 0.001928094839968253, "epoch": 0.005786652200036166, "grad_norm": 0.1416006088256836, "learning_rate": 2e-07, "loss": -0.022, "step": 62 }, { "clip_ratio/high_max": 0.0024896654213080183, "clip_ratio/high_mean": 0.0010819306517078076, "clip_ratio/low_mean": 0.0008097823738353327, "clip_ratio/low_min": 0.00011734104282368207, "clip_ratio/region_mean": 0.0018917130655609071, "epoch": 0.00587998530003675, "grad_norm": 0.1736774444580078, "learning_rate": 2e-07, "loss": 0.028, "step": 63 }, { "clip_ratio/high_max": 0.002121022858773358, "clip_ratio/high_mean": 0.0010597049058560515, "clip_ratio/low_mean": 0.0008689436963322805, "clip_ratio/low_min": 0.00020803285497095203, "clip_ratio/region_mean": 0.00192864854761865, "epoch": 0.005973318400037333, "grad_norm": 0.22265668213367462, "learning_rate": 2e-07, "loss": 0.0379, "step": 64 }, { "clip_ratio/high_max": 0.0023637892700207885, "clip_ratio/high_mean": 0.0009820849154493771, "clip_ratio/low_mean": 0.0009203073786920868, "clip_ratio/low_min": 0.00013381684539126582, "clip_ratio/region_mean": 0.0019023923159693368, "epoch": 0.006066651500037917, "grad_norm": 0.2035197913646698, "learning_rate": 2e-07, "loss": 0.0696, "step": 65 }, { "clip_ratio/high_max": 0.002492961139068939, "clip_ratio/high_mean": 0.0012293810032133479, "clip_ratio/low_mean": 0.0007596666455356171, "clip_ratio/low_min": 0.0001599594024810358, "clip_ratio/region_mean": 0.0019890476323780604, "epoch": 0.0061599846000385, "grad_norm": 0.17650087177753448, "learning_rate": 2e-07, "loss": 0.0158, "step": 66 }, { "clip_ratio/high_max": 0.0028149780118837953, "clip_ratio/high_mean": 0.0010748904351203237, "clip_ratio/low_mean": 0.000826733357826015, "clip_ratio/low_min": 0.00010900974393734941, "clip_ratio/region_mean": 0.0019016238293261267, "epoch": 0.0062533177000390835, "grad_norm": 0.2179858386516571, "learning_rate": 2e-07, "loss": 0.0391, "step": 67 }, { "clip_ratio/high_max": 0.0023134970833780244, "clip_ratio/high_mean": 0.0010798194380186033, "clip_ratio/low_mean": 0.0008778970168350497, "clip_ratio/low_min": 5.8156663726549596e-05, "clip_ratio/region_mean": 0.001957716500328388, "epoch": 0.006346650800039666, "grad_norm": 0.2374267876148224, "learning_rate": 2e-07, "loss": 0.0024, "step": 68 }, { "clip_ratio/high_max": 0.0027998356890748255, "clip_ratio/high_mean": 0.0012960523454239592, "clip_ratio/low_mean": 0.0009323103022325085, "clip_ratio/low_min": 9.667921585787553e-05, "clip_ratio/region_mean": 0.0022283626240096055, "epoch": 0.00643998390004025, "grad_norm": 0.1723237782716751, "learning_rate": 2e-07, "loss": -0.0295, "step": 69 }, { "clip_ratio/high_max": 0.0027361434331396595, "clip_ratio/high_mean": 0.0012135108772781678, "clip_ratio/low_mean": 0.0008462146015517646, "clip_ratio/low_min": 1.4838556126051117e-05, "clip_ratio/region_mean": 0.00205972545518307, "epoch": 0.006533317000040833, "grad_norm": 0.2730659544467926, "learning_rate": 2e-07, "loss": 0.0066, "step": 70 }, { "clip_ratio/high_max": 0.002428738422167953, "clip_ratio/high_mean": 0.0011149525598739274, "clip_ratio/low_mean": 0.0009069315674423706, "clip_ratio/low_min": 0.00010064037815027405, "clip_ratio/region_mean": 0.0020218841309542768, "epoch": 0.0066266501000414165, "grad_norm": 0.18444810807704926, "learning_rate": 2e-07, "loss": -0.0091, "step": 71 }, { "clip_ratio/high_max": 0.0028033852941007353, "clip_ratio/high_mean": 0.00131632607372012, "clip_ratio/low_mean": 0.0009276634955313057, "clip_ratio/low_min": 2.9113659365975764e-05, "clip_ratio/region_mean": 0.0022439895474235527, "epoch": 0.006719983200042, "grad_norm": 0.1775394082069397, "learning_rate": 2e-07, "loss": -0.0155, "step": 72 }, { "clip_ratio/high_max": 0.0024941975352703594, "clip_ratio/high_mean": 0.001106993600842543, "clip_ratio/low_mean": 0.000981330216745846, "clip_ratio/low_min": 0.00012341036017460283, "clip_ratio/region_mean": 0.0020883238248643465, "epoch": 0.0068133163000425835, "grad_norm": 0.17234064638614655, "learning_rate": 2e-07, "loss": 0.0579, "step": 73 }, { "clip_ratio/high_max": 0.0026957338413922116, "clip_ratio/high_mean": 0.001078817767847795, "clip_ratio/low_mean": 0.0009087666549021378, "clip_ratio/low_min": 0.00010553940501267789, "clip_ratio/region_mean": 0.001987584364542272, "epoch": 0.006906649400043167, "grad_norm": 0.14620539546012878, "learning_rate": 2e-07, "loss": 0.0073, "step": 74 }, { "clip_ratio/high_max": 0.002455393645504955, "clip_ratio/high_mean": 0.001171944120869739, "clip_ratio/low_mean": 0.0012243612691236194, "clip_ratio/low_min": 0.00021858633954252582, "clip_ratio/region_mean": 0.002396305375441443, "epoch": 0.0069999825000437495, "grad_norm": 0.1882891207933426, "learning_rate": 2e-07, "loss": 0.0386, "step": 75 }, { "clip_ratio/high_max": 0.0024321619712281972, "clip_ratio/high_mean": 0.001136313752795104, "clip_ratio/low_mean": 0.0009440828944207169, "clip_ratio/low_min": 0.00016096510080387816, "clip_ratio/region_mean": 0.0020803966108360328, "epoch": 0.007093315600044333, "grad_norm": 0.1752462089061737, "learning_rate": 2e-07, "loss": 0.0489, "step": 76 }, { "clip_ratio/high_max": 0.0024777910803095438, "clip_ratio/high_mean": 0.0011775482307712082, "clip_ratio/low_mean": 0.0011148723133374006, "clip_ratio/low_min": 0.00014076209481572732, "clip_ratio/region_mean": 0.002292420467711054, "epoch": 0.0071866487000449165, "grad_norm": 0.1601407527923584, "learning_rate": 2e-07, "loss": 0.056, "step": 77 }, { "clip_ratio/high_max": 0.00286518858774798, "clip_ratio/high_mean": 0.0011782766887336038, "clip_ratio/low_mean": 0.0010658108549250755, "clip_ratio/low_min": 0.00020794519969058456, "clip_ratio/region_mean": 0.0022440875836764462, "epoch": 0.0072799818000455, "grad_norm": 0.1464841067790985, "learning_rate": 2e-07, "loss": -0.0047, "step": 78 }, { "clip_ratio/high_max": 0.0026522570697125047, "clip_ratio/high_mean": 0.0011561137689568568, "clip_ratio/low_mean": 0.0011050398024963215, "clip_ratio/low_min": 0.0001377565286020399, "clip_ratio/region_mean": 0.002261153560539242, "epoch": 0.007373314900046083, "grad_norm": 0.16872893273830414, "learning_rate": 2e-07, "loss": 0.0167, "step": 79 }, { "clip_ratio/high_max": 0.0020787146131624468, "clip_ratio/high_mean": 0.0010273715779476333, "clip_ratio/low_mean": 0.001056640008755494, "clip_ratio/low_min": 0.00013105883408570662, "clip_ratio/region_mean": 0.0020840116194449365, "epoch": 0.007466648000046667, "grad_norm": 0.1522258073091507, "learning_rate": 2e-07, "loss": 0.0243, "step": 80 }, { "clip_ratio/high_max": 0.0025840453890850767, "clip_ratio/high_mean": 0.0011914781789528206, "clip_ratio/low_mean": 0.00096780594685697, "clip_ratio/low_min": 8.13832211861154e-05, "clip_ratio/region_mean": 0.0021592840930679813, "epoch": 0.0075599811000472495, "grad_norm": 0.1554807871580124, "learning_rate": 2e-07, "loss": -0.0174, "step": 81 }, { "clip_ratio/high_max": 0.0025976478500524536, "clip_ratio/high_mean": 0.0011098824033979326, "clip_ratio/low_mean": 0.0010079027779283933, "clip_ratio/low_min": 0.00018711211305344477, "clip_ratio/region_mean": 0.002117785144946538, "epoch": 0.007653314200047833, "grad_norm": 0.16818466782569885, "learning_rate": 2e-07, "loss": 0.0013, "step": 82 }, { "clip_ratio/high_max": 0.002760782968834974, "clip_ratio/high_mean": 0.0012953164550708607, "clip_ratio/low_mean": 0.0010259411537845153, "clip_ratio/low_min": 0.00020376176507852506, "clip_ratio/region_mean": 0.0023212575761135668, "epoch": 0.007746647300048416, "grad_norm": 0.15922905504703522, "learning_rate": 2e-07, "loss": 0.0185, "step": 83 }, { "clip_ratio/high_max": 0.0022610355663346127, "clip_ratio/high_mean": 0.0010640630425768904, "clip_ratio/low_mean": 0.0011177770211361349, "clip_ratio/low_min": 0.00017762661718734307, "clip_ratio/region_mean": 0.002181840070988983, "epoch": 0.007839980400049, "grad_norm": 0.2317257821559906, "learning_rate": 2e-07, "loss": 0.0707, "step": 84 }, { "clip_ratio/high_max": 0.003170649491949007, "clip_ratio/high_mean": 0.0012081483073416166, "clip_ratio/low_mean": 0.0009731504978844896, "clip_ratio/low_min": 6.705739633616759e-05, "clip_ratio/region_mean": 0.002181298768846318, "epoch": 0.007933313500049582, "grad_norm": 0.1538148671388626, "learning_rate": 2e-07, "loss": -0.0345, "step": 85 }, { "clip_ratio/high_max": 0.00311738379241433, "clip_ratio/high_mean": 0.0014117482569417916, "clip_ratio/low_mean": 0.0009917117859004065, "clip_ratio/low_min": 0.00012142241030232981, "clip_ratio/region_mean": 0.0024034599846345372, "epoch": 0.008026646600050166, "grad_norm": 0.15156802535057068, "learning_rate": 2e-07, "loss": -0.0178, "step": 86 }, { "clip_ratio/high_max": 0.0026232885938952677, "clip_ratio/high_mean": 0.0012325879106356297, "clip_ratio/low_mean": 0.0010174159961024998, "clip_ratio/low_min": 8.909288317227038e-05, "clip_ratio/region_mean": 0.0022500038976431824, "epoch": 0.00811997970005075, "grad_norm": 0.15562191605567932, "learning_rate": 2e-07, "loss": -0.0129, "step": 87 }, { "clip_ratio/high_max": 0.0027344549307599664, "clip_ratio/high_mean": 0.0011531643067428377, "clip_ratio/low_mean": 0.001031320858601248, "clip_ratio/low_min": 0.00019616128975030733, "clip_ratio/region_mean": 0.002184485136240255, "epoch": 0.008213312800051333, "grad_norm": 0.1790563017129898, "learning_rate": 2e-07, "loss": 0.0112, "step": 88 }, { "clip_ratio/high_max": 0.0028811536758439615, "clip_ratio/high_mean": 0.0011734828731277958, "clip_ratio/low_mean": 0.0010850944672711194, "clip_ratio/low_min": 0.00019698643245646963, "clip_ratio/region_mean": 0.0022585772967431694, "epoch": 0.008306645900051916, "grad_norm": 0.1686941534280777, "learning_rate": 2e-07, "loss": 0.0511, "step": 89 }, { "clip_ratio/high_max": 0.002771753519482445, "clip_ratio/high_mean": 0.0011782679739553714, "clip_ratio/low_mean": 0.0010731514885264914, "clip_ratio/low_min": 0.000161624574502639, "clip_ratio/region_mean": 0.0022514195006806403, "epoch": 0.0083999790000525, "grad_norm": 0.1549367606639862, "learning_rate": 2e-07, "loss": 0.0436, "step": 90 }, { "clip_ratio/high_max": 0.0026062028628075495, "clip_ratio/high_mean": 0.0012258525712240953, "clip_ratio/low_mean": 0.0011274315911578014, "clip_ratio/low_min": 0.00011721917962859152, "clip_ratio/region_mean": 0.0023532842023996636, "epoch": 0.008493312100053083, "grad_norm": 0.23962625861167908, "learning_rate": 2e-07, "loss": 0.024, "step": 91 }, { "clip_ratio/high_max": 0.0024852457499946468, "clip_ratio/high_mean": 0.001242582053237129, "clip_ratio/low_mean": 0.0010324958821001928, "clip_ratio/low_min": 9.696555480331881e-05, "clip_ratio/region_mean": 0.0022750778443878517, "epoch": 0.008586645200053667, "grad_norm": 0.15057489275932312, "learning_rate": 2e-07, "loss": 0.0038, "step": 92 }, { "clip_ratio/high_max": 0.0029352035126066767, "clip_ratio/high_mean": 0.001217487242684001, "clip_ratio/low_mean": 0.001007187383947894, "clip_ratio/low_min": 4.805749995284714e-05, "clip_ratio/region_mean": 0.0022246746957534924, "epoch": 0.00867997830005425, "grad_norm": 0.1444324553012848, "learning_rate": 2e-07, "loss": 0.0091, "step": 93 }, { "clip_ratio/high_max": 0.0031350492281490006, "clip_ratio/high_mean": 0.001457300822949037, "clip_ratio/low_mean": 0.0008478179879602976, "clip_ratio/low_min": 4.969928068021545e-05, "clip_ratio/region_mean": 0.0023051187745295465, "epoch": 0.008773311400054834, "grad_norm": 0.18100248277187347, "learning_rate": 2e-07, "loss": -0.0343, "step": 94 }, { "clip_ratio/high_max": 0.002497330387996044, "clip_ratio/high_mean": 0.0011044251623388845, "clip_ratio/low_mean": 0.0009866000782494666, "clip_ratio/low_min": 6.684048094030004e-05, "clip_ratio/region_mean": 0.0020910252424073406, "epoch": 0.008866644500055417, "grad_norm": 0.1613749861717224, "learning_rate": 2e-07, "loss": 0.0099, "step": 95 }, { "clip_ratio/high_max": 0.0024407687997154426, "clip_ratio/high_mean": 0.0011559821032278705, "clip_ratio/low_mean": 0.0010629022708599223, "clip_ratio/low_min": 0.00016844186484377133, "clip_ratio/region_mean": 0.002218884401372634, "epoch": 0.008959977600056, "grad_norm": 0.2290535867214203, "learning_rate": 2e-07, "loss": 0.0888, "step": 96 }, { "clip_ratio/high_max": 0.002623085805680603, "clip_ratio/high_mean": 0.0011537889222381637, "clip_ratio/low_mean": 0.000994703579635825, "clip_ratio/low_min": 0.00012388513187033823, "clip_ratio/region_mean": 0.0021484925237018615, "epoch": 0.009053310700056582, "grad_norm": 0.14506809413433075, "learning_rate": 2e-07, "loss": 0.0327, "step": 97 }, { "clip_ratio/high_max": 0.0028433686966309324, "clip_ratio/high_mean": 0.0011988236910838168, "clip_ratio/low_mean": 0.0010734581337601412, "clip_ratio/low_min": 0.00014010912127560005, "clip_ratio/region_mean": 0.0022722818393958732, "epoch": 0.009146643800057166, "grad_norm": 0.1923784464597702, "learning_rate": 2e-07, "loss": 0.0362, "step": 98 }, { "clip_ratio/high_max": 0.0027167624139110558, "clip_ratio/high_mean": 0.0011705067881848663, "clip_ratio/low_mean": 0.001112714220653288, "clip_ratio/low_min": 0.0001699766189631191, "clip_ratio/region_mean": 0.002283220979734324, "epoch": 0.00923997690005775, "grad_norm": 0.2344072312116623, "learning_rate": 2e-07, "loss": -0.0199, "step": 99 }, { "clip_ratio/high_max": 0.0027516422705957666, "clip_ratio/high_mean": 0.0013628134729515295, "clip_ratio/low_mean": 0.0011274837270320859, "clip_ratio/low_min": 0.00020209298600093462, "clip_ratio/region_mean": 0.0024902972363634035, "epoch": 0.009333310000058333, "grad_norm": 0.18619844317436218, "learning_rate": 2e-07, "loss": -0.0015, "step": 100 }, { "clip_ratio/high_max": 0.002597353857709095, "clip_ratio/high_mean": 0.0012452992887119763, "clip_ratio/low_mean": 0.001088854076442658, "clip_ratio/low_min": 8.280910878966097e-05, "clip_ratio/region_mean": 0.0023341533160419203, "epoch": 0.009426643100058916, "grad_norm": 0.2183188945055008, "learning_rate": 2e-07, "loss": -0.0352, "step": 101 }, { "clip_ratio/high_max": 0.0025321473585790955, "clip_ratio/high_mean": 0.001090346704586409, "clip_ratio/low_mean": 0.0013514034617401194, "clip_ratio/low_min": 0.000209757589345827, "clip_ratio/region_mean": 0.0024417501699645072, "epoch": 0.0095199762000595, "grad_norm": 0.22511126101016998, "learning_rate": 2e-07, "loss": 0.0642, "step": 102 }, { "clip_ratio/high_max": 0.0028341261204332113, "clip_ratio/high_mean": 0.0013259261286293622, "clip_ratio/low_mean": 0.001099737613913021, "clip_ratio/low_min": 9.952640039045946e-05, "clip_ratio/region_mean": 0.0024256637261714786, "epoch": 0.009613309300060083, "grad_norm": 0.21416018903255463, "learning_rate": 2e-07, "loss": 0.0043, "step": 103 }, { "clip_ratio/high_max": 0.0024637576898385305, "clip_ratio/high_mean": 0.0011640440516202943, "clip_ratio/low_mean": 0.0011428451743995538, "clip_ratio/low_min": 0.00018180822644353611, "clip_ratio/region_mean": 0.00230688918964006, "epoch": 0.009706642400060667, "grad_norm": 0.16819307208061218, "learning_rate": 2e-07, "loss": 0.0199, "step": 104 }, { "clip_ratio/high_max": 0.002184862667490961, "clip_ratio/high_mean": 0.0011315882438793778, "clip_ratio/low_mean": 0.0012017831759294495, "clip_ratio/low_min": 0.00017777476750779897, "clip_ratio/region_mean": 0.0023333714561886154, "epoch": 0.00979997550006125, "grad_norm": 0.17144903540611267, "learning_rate": 2e-07, "loss": 0.041, "step": 105 }, { "clip_ratio/high_max": 0.002373758194153197, "clip_ratio/high_mean": 0.0011477002808533143, "clip_ratio/low_mean": 0.0011392829364922363, "clip_ratio/low_min": 0.0001842488763941219, "clip_ratio/region_mean": 0.002286983173689805, "epoch": 0.009893308600061834, "grad_norm": 0.2242686152458191, "learning_rate": 2e-07, "loss": 0.0315, "step": 106 }, { "clip_ratio/high_max": 0.0027733788228943013, "clip_ratio/high_mean": 0.001306910955463536, "clip_ratio/low_mean": 0.0012386434391373768, "clip_ratio/low_min": 0.00022783445183449658, "clip_ratio/region_mean": 0.002545554409152828, "epoch": 0.009986641700062417, "grad_norm": 0.18035157024860382, "learning_rate": 2e-07, "loss": 0.0333, "step": 107 }, { "clip_ratio/high_max": 0.002564405724115204, "clip_ratio/high_mean": 0.0012918792199343443, "clip_ratio/low_mean": 0.001098064603866078, "clip_ratio/low_min": 0.00011617127438512398, "clip_ratio/region_mean": 0.002389943845628295, "epoch": 0.010079974800063, "grad_norm": 0.17409373819828033, "learning_rate": 2e-07, "loss": -0.0138, "step": 108 }, { "clip_ratio/high_max": 0.0032434069144073874, "clip_ratio/high_mean": 0.0013272350370243657, "clip_ratio/low_mean": 0.0012174488892924273, "clip_ratio/low_min": 0.00018728227405517828, "clip_ratio/region_mean": 0.0025446839063079096, "epoch": 0.010173307900063584, "grad_norm": 0.18501029908657074, "learning_rate": 2e-07, "loss": 0.009, "step": 109 }, { "clip_ratio/high_max": 0.0028590106303454377, "clip_ratio/high_mean": 0.0012671758013311774, "clip_ratio/low_mean": 0.0011612375856202561, "clip_ratio/low_min": 0.00019318097838549875, "clip_ratio/region_mean": 0.002428413434245158, "epoch": 0.010266641000064166, "grad_norm": 0.18641002476215363, "learning_rate": 2e-07, "loss": 0.023, "step": 110 }, { "clip_ratio/high_max": 0.002582947810878977, "clip_ratio/high_mean": 0.0012354759601294063, "clip_ratio/low_mean": 0.0011541565854713554, "clip_ratio/low_min": 0.00017510279212729074, "clip_ratio/region_mean": 0.002389632565609645, "epoch": 0.010359974100064749, "grad_norm": 0.18417418003082275, "learning_rate": 2e-07, "loss": 0.0087, "step": 111 }, { "clip_ratio/high_max": 0.0027545609045773745, "clip_ratio/high_mean": 0.0013644360078615136, "clip_ratio/low_mean": 0.0013211760779086035, "clip_ratio/low_min": 0.00025255313357774867, "clip_ratio/region_mean": 0.0026856120821321383, "epoch": 0.010453307200065333, "grad_norm": 0.22844615578651428, "learning_rate": 2e-07, "loss": 0.0051, "step": 112 }, { "clip_ratio/high_max": 0.0027774928603321314, "clip_ratio/high_mean": 0.0011485788563732058, "clip_ratio/low_mean": 0.0013238698338682298, "clip_ratio/low_min": 0.00020535209114314057, "clip_ratio/region_mean": 0.002472448664775584, "epoch": 0.010546640300065916, "grad_norm": 0.20752160251140594, "learning_rate": 2e-07, "loss": 0.0515, "step": 113 }, { "clip_ratio/high_max": 0.0028086688034818508, "clip_ratio/high_mean": 0.0013008714631723706, "clip_ratio/low_mean": 0.0012286788114579394, "clip_ratio/low_min": 0.00018464175900589908, "clip_ratio/region_mean": 0.0025295503219240345, "epoch": 0.0106399734000665, "grad_norm": 0.20691971480846405, "learning_rate": 2e-07, "loss": -0.007, "step": 114 }, { "clip_ratio/high_max": 0.0027543717005755752, "clip_ratio/high_mean": 0.0012179336099507054, "clip_ratio/low_mean": 0.001310126459429739, "clip_ratio/low_min": 0.00026240361603413476, "clip_ratio/region_mean": 0.0025280600166297518, "epoch": 0.010733306500067083, "grad_norm": 0.2247685343027115, "learning_rate": 2e-07, "loss": 0.0688, "step": 115 }, { "clip_ratio/high_max": 0.002797228764393367, "clip_ratio/high_mean": 0.0014182783925207332, "clip_ratio/low_mean": 0.001260365705093136, "clip_ratio/low_min": 0.00015520531178481178, "clip_ratio/region_mean": 0.0026786440357682295, "epoch": 0.010826639600067666, "grad_norm": 0.2570752501487732, "learning_rate": 2e-07, "loss": -0.0156, "step": 116 }, { "clip_ratio/high_max": 0.0026659176000976004, "clip_ratio/high_mean": 0.0013909117515140679, "clip_ratio/low_mean": 0.001155127742094919, "clip_ratio/low_min": 5.526294989977032e-05, "clip_ratio/region_mean": 0.0025460394681431353, "epoch": 0.01091997270006825, "grad_norm": 0.1940942108631134, "learning_rate": 2e-07, "loss": -0.0116, "step": 117 }, { "clip_ratio/high_max": 0.002885510170017369, "clip_ratio/high_mean": 0.0013137201385688968, "clip_ratio/low_mean": 0.0013080732351227198, "clip_ratio/low_min": 0.00019464237175270682, "clip_ratio/region_mean": 0.0026217933336738497, "epoch": 0.011013305800068833, "grad_norm": 0.18525971472263336, "learning_rate": 2e-07, "loss": 0.018, "step": 118 }, { "clip_ratio/high_max": 0.0029662559754797257, "clip_ratio/high_mean": 0.0013661800585396122, "clip_ratio/low_mean": 0.0011154318531225726, "clip_ratio/low_min": 1.9042744497710373e-05, "clip_ratio/region_mean": 0.0024816118166199885, "epoch": 0.011106638900069417, "grad_norm": 0.1879681497812271, "learning_rate": 2e-07, "loss": 0.0006, "step": 119 }, { "clip_ratio/high_max": 0.002786269164062105, "clip_ratio/high_mean": 0.001217325418110704, "clip_ratio/low_mean": 0.0014205829211277887, "clip_ratio/low_min": 0.00019812230584648205, "clip_ratio/region_mean": 0.002637908371980302, "epoch": 0.01119997200007, "grad_norm": 0.28733617067337036, "learning_rate": 2e-07, "loss": 0.0575, "step": 120 }, { "clip_ratio/high_max": 0.002727212238823995, "clip_ratio/high_mean": 0.0013546798982133623, "clip_ratio/low_mean": 0.0013883120300306473, "clip_ratio/low_min": 0.00026377217727713287, "clip_ratio/region_mean": 0.002742991920968052, "epoch": 0.011293305100070584, "grad_norm": 0.22074785828590393, "learning_rate": 2e-07, "loss": -0.006, "step": 121 }, { "clip_ratio/high_max": 0.0029078946536174044, "clip_ratio/high_mean": 0.0013981953234178945, "clip_ratio/low_mean": 0.0013565840417868458, "clip_ratio/low_min": 0.00022218512822291814, "clip_ratio/region_mean": 0.0027547793652047403, "epoch": 0.011386638200071167, "grad_norm": 0.19470542669296265, "learning_rate": 2e-07, "loss": -0.0086, "step": 122 }, { "clip_ratio/high_max": 0.0028591438531293534, "clip_ratio/high_mean": 0.0013676728012796957, "clip_ratio/low_mean": 0.0012662382287089713, "clip_ratio/low_min": 0.00018946400723507395, "clip_ratio/region_mean": 0.002633911033626646, "epoch": 0.011479971300071749, "grad_norm": 0.19274753332138062, "learning_rate": 2e-07, "loss": 0.0167, "step": 123 }, { "clip_ratio/high_max": 0.0029422046500258148, "clip_ratio/high_mean": 0.0013169669873605017, "clip_ratio/low_mean": 0.0013506649956980255, "clip_ratio/low_min": 0.00023921645424707094, "clip_ratio/region_mean": 0.0026676319976104423, "epoch": 0.011573304400072332, "grad_norm": 0.23007743060588837, "learning_rate": 2e-07, "loss": 0.0198, "step": 124 }, { "clip_ratio/high_max": 0.002652174654940609, "clip_ratio/high_mean": 0.0012871359722339548, "clip_ratio/low_mean": 0.0013283246153150685, "clip_ratio/low_min": 0.000243618713284377, "clip_ratio/region_mean": 0.0026154605802730657, "epoch": 0.011666637500072916, "grad_norm": 0.16703875362873077, "learning_rate": 2e-07, "loss": 0.0011, "step": 125 }, { "clip_ratio/high_max": 0.0029051582096144557, "clip_ratio/high_mean": 0.0012591703780344687, "clip_ratio/low_mean": 0.0014325643896881957, "clip_ratio/low_min": 0.0001781174023562926, "clip_ratio/region_mean": 0.002691734698601067, "epoch": 0.0117599706000735, "grad_norm": 0.22993162274360657, "learning_rate": 2e-07, "loss": 0.0398, "step": 126 }, { "clip_ratio/high_max": 0.0025598183419788256, "clip_ratio/high_mean": 0.0012333964768913575, "clip_ratio/low_mean": 0.0011834727411041968, "clip_ratio/low_min": 0.0001432362869309145, "clip_ratio/region_mean": 0.0024168692616513, "epoch": 0.011853303700074083, "grad_norm": 0.19572004675865173, "learning_rate": 2e-07, "loss": -0.0058, "step": 127 }, { "clip_ratio/high_max": 0.0031218914373312145, "clip_ratio/high_mean": 0.0014380813263414893, "clip_ratio/low_mean": 0.0013279849590617232, "clip_ratio/low_min": 0.00020982310161343776, "clip_ratio/region_mean": 0.0027660662817652337, "epoch": 0.011946636800074666, "grad_norm": 0.21303395926952362, "learning_rate": 2e-07, "loss": -0.0043, "step": 128 }, { "clip_ratio/high_max": 0.001784255731763551, "clip_ratio/high_mean": 0.0007330932057811879, "clip_ratio/low_mean": 0.0006113292001828086, "clip_ratio/low_min": 5.269098801363725e-05, "clip_ratio/region_mean": 0.001344422413239954, "completions/clipped_ratio": 0.014020647321428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 611.9296875, "completions/mean_terminated_length": 562.3861083984375, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.01203996990007525, "grad_norm": 0.12000353634357452, "learning_rate": 2e-07, "loss": 0.0347, "num_tokens": 162255295.0, "reward": 0.557076632976532, "reward_std": 0.2059343457221985, "rewards/simpleverify_reward/mean": 0.5570765733718872, "rewards/simpleverify_reward/std": 0.49673375487327576, "step": 129 }, { "clip_ratio/high_max": 0.002099405366607243, "clip_ratio/high_mean": 0.0008241080176958349, "clip_ratio/low_mean": 0.0005344934438653581, "clip_ratio/low_min": 4.057550813740818e-05, "clip_ratio/region_mean": 0.0013586014865722973, "epoch": 0.012133303000075833, "grad_norm": 0.11841220408678055, "learning_rate": 2e-07, "loss": 0.0357, "step": 130 }, { "clip_ratio/high_max": 0.001901704141346272, "clip_ratio/high_mean": 0.0008093671131064184, "clip_ratio/low_mean": 0.0005851987843925599, "clip_ratio/low_min": 5.283442442305386e-05, "clip_ratio/region_mean": 0.0013945658938609995, "epoch": 0.012226636100076417, "grad_norm": 6.780872344970703, "learning_rate": 2e-07, "loss": 0.0187, "step": 131 }, { "clip_ratio/high_max": 0.0022469941977760755, "clip_ratio/high_mean": 0.0008135619664244587, "clip_ratio/low_mean": 0.0005735903341701487, "clip_ratio/low_min": 4.25398102379404e-05, "clip_ratio/region_mean": 0.0013871522860426921, "epoch": 0.012319969200077, "grad_norm": 0.1178605929017067, "learning_rate": 2e-07, "loss": -0.0102, "step": 132 }, { "clip_ratio/high_max": 0.0020471107054618187, "clip_ratio/high_mean": 0.0008086670477496227, "clip_ratio/low_mean": 0.0004901428465018398, "clip_ratio/low_min": 2.6928429178951774e-05, "clip_ratio/region_mean": 0.0012988099042559043, "epoch": 0.012413302300077584, "grad_norm": 0.11176750063896179, "learning_rate": 2e-07, "loss": -0.0025, "step": 133 }, { "clip_ratio/high_max": 0.0017681449535302818, "clip_ratio/high_mean": 0.0007636647005710984, "clip_ratio/low_mean": 0.0006020760283718118, "clip_ratio/low_min": 1.3985231817059685e-05, "clip_ratio/region_mean": 0.0013657407362188678, "epoch": 0.012506635400078167, "grad_norm": 0.11252443492412567, "learning_rate": 2e-07, "loss": 0.0274, "step": 134 }, { "clip_ratio/high_max": 0.0018361956281296443, "clip_ratio/high_mean": 0.0007644933739356929, "clip_ratio/low_mean": 0.0006281385358306579, "clip_ratio/low_min": 4.9927264626603574e-05, "clip_ratio/region_mean": 0.0013926318824815098, "epoch": 0.01259996850007875, "grad_norm": 0.11987116932868958, "learning_rate": 2e-07, "loss": 0.0246, "step": 135 }, { "clip_ratio/high_max": 0.0019423902776907198, "clip_ratio/high_mean": 0.0008311068831972079, "clip_ratio/low_mean": 0.0005593588757619727, "clip_ratio/low_min": 9.836912431637757e-05, "clip_ratio/region_mean": 0.00139046576077817, "epoch": 0.012693301600079332, "grad_norm": 0.11899188905954361, "learning_rate": 2e-07, "loss": 0.0239, "step": 136 }, { "clip_ratio/high_max": 0.0020093353305128403, "clip_ratio/high_mean": 0.0007919129893707577, "clip_ratio/low_mean": 0.000594694507526583, "clip_ratio/low_min": 5.4855976486578584e-05, "clip_ratio/region_mean": 0.0013866074878023937, "epoch": 0.012786634700079916, "grad_norm": 0.12157698720693588, "learning_rate": 2e-07, "loss": 0.0142, "step": 137 }, { "clip_ratio/high_max": 0.0018474461394362152, "clip_ratio/high_mean": 0.0007798396964062704, "clip_ratio/low_mean": 0.0005679676087311236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013478073306032456, "epoch": 0.0128799678000805, "grad_norm": 0.12331555038690567, "learning_rate": 2e-07, "loss": 0.0101, "step": 138 }, { "clip_ratio/high_max": 0.0017495603024144657, "clip_ratio/high_mean": 0.0007733914480922977, "clip_ratio/low_mean": 0.0005664287837134907, "clip_ratio/low_min": 1.3557483725890052e-05, "clip_ratio/region_mean": 0.0013398202245298307, "epoch": 0.012973300900081083, "grad_norm": 0.11285652220249176, "learning_rate": 2e-07, "loss": 0.0114, "step": 139 }, { "clip_ratio/high_max": 0.002214077645476209, "clip_ratio/high_mean": 0.0008908995805541053, "clip_ratio/low_mean": 0.0005530921607714845, "clip_ratio/low_min": 4.997755695512751e-05, "clip_ratio/region_mean": 0.0014439917504205368, "epoch": 0.013066634000081666, "grad_norm": 0.12912720441818237, "learning_rate": 2e-07, "loss": -0.0256, "step": 140 }, { "clip_ratio/high_max": 0.001933634397573769, "clip_ratio/high_mean": 0.0008633529432700016, "clip_ratio/low_mean": 0.0006306193026830442, "clip_ratio/low_min": 5.216099998506252e-05, "clip_ratio/region_mean": 0.001493972224125173, "epoch": 0.01315996710008225, "grad_norm": 0.12150504440069199, "learning_rate": 2e-07, "loss": 0.0459, "step": 141 }, { "clip_ratio/high_max": 0.002056179382634582, "clip_ratio/high_mean": 0.0008379270657314919, "clip_ratio/low_mean": 0.0005806022827528068, "clip_ratio/low_min": 2.367632168898126e-05, "clip_ratio/region_mean": 0.0014185293439368252, "epoch": 0.013253300200082833, "grad_norm": 0.13066498935222626, "learning_rate": 2e-07, "loss": 0.0426, "step": 142 }, { "clip_ratio/high_max": 0.0017760896917025093, "clip_ratio/high_mean": 0.0007030675151327159, "clip_ratio/low_mean": 0.0008029214277485153, "clip_ratio/low_min": 0.00010373762961535249, "clip_ratio/region_mean": 0.0015059889337862842, "epoch": 0.013346633300083417, "grad_norm": 0.11980723589658737, "learning_rate": 2e-07, "loss": 0.056, "step": 143 }, { "clip_ratio/high_max": 0.0019900166189472657, "clip_ratio/high_mean": 0.0008574035437050043, "clip_ratio/low_mean": 0.000552692679775646, "clip_ratio/low_min": 3.0324592898978153e-05, "clip_ratio/region_mean": 0.001410096236213576, "epoch": 0.013439966400084, "grad_norm": 0.10925982892513275, "learning_rate": 2e-07, "loss": 0.004, "step": 144 }, { "clip_ratio/high_max": 0.0017764489020919427, "clip_ratio/high_mean": 0.000811282296126592, "clip_ratio/low_mean": 0.000644706280581886, "clip_ratio/low_min": 4.615522448148113e-05, "clip_ratio/region_mean": 0.001455988545785658, "epoch": 0.013533299500084583, "grad_norm": 0.14364492893218994, "learning_rate": 2e-07, "loss": 0.0263, "step": 145 }, { "clip_ratio/high_max": 0.0018374718092672992, "clip_ratio/high_mean": 0.0007956195859151194, "clip_ratio/low_mean": 0.0006395709906428237, "clip_ratio/low_min": 6.809621663705911e-05, "clip_ratio/region_mean": 0.0014351905592775438, "epoch": 0.013626632600085167, "grad_norm": 0.12205006182193756, "learning_rate": 2e-07, "loss": 0.033, "step": 146 }, { "clip_ratio/high_max": 0.00192593449173728, "clip_ratio/high_mean": 0.0007942908159748185, "clip_ratio/low_mean": 0.0006335229209071258, "clip_ratio/low_min": 6.25474003754789e-05, "clip_ratio/region_mean": 0.0014278137605288066, "epoch": 0.01371996570008575, "grad_norm": 0.1163218766450882, "learning_rate": 2e-07, "loss": 0.0294, "step": 147 }, { "clip_ratio/high_max": 0.002195060260419268, "clip_ratio/high_mean": 0.0009122201390709961, "clip_ratio/low_mean": 0.0005158637350177742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014280838549893815, "epoch": 0.013813298800086334, "grad_norm": 0.11541525274515152, "learning_rate": 2e-07, "loss": -0.0046, "step": 148 }, { "clip_ratio/high_max": 0.0020206489534757566, "clip_ratio/high_mean": 0.0007737030800853972, "clip_ratio/low_mean": 0.0005801492370665073, "clip_ratio/low_min": 3.292215751571348e-05, "clip_ratio/region_mean": 0.0013538523198803887, "epoch": 0.013906631900086916, "grad_norm": 0.11246857792139053, "learning_rate": 2e-07, "loss": 0.0041, "step": 149 }, { "clip_ratio/high_max": 0.0018048350248136558, "clip_ratio/high_mean": 0.000699517995599308, "clip_ratio/low_mean": 0.0006342549932014663, "clip_ratio/low_min": 2.725708964135265e-05, "clip_ratio/region_mean": 0.0013337729797058273, "epoch": 0.013999965000087499, "grad_norm": 0.11919964849948883, "learning_rate": 2e-07, "loss": 0.004, "step": 150 }, { "clip_ratio/high_max": 0.002169116876757471, "clip_ratio/high_mean": 0.0009271192247979343, "clip_ratio/low_mean": 0.0005849077242601197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001512026967247948, "epoch": 0.014093298100088083, "grad_norm": 0.11215604096651077, "learning_rate": 2e-07, "loss": 0.0042, "step": 151 }, { "clip_ratio/high_max": 0.0017692481487756595, "clip_ratio/high_mean": 0.0007387256428046385, "clip_ratio/low_mean": 0.0005889445837965468, "clip_ratio/low_min": 1.3363267498789355e-05, "clip_ratio/region_mean": 0.0013276701720315032, "epoch": 0.014186631200088666, "grad_norm": 0.12634488940238953, "learning_rate": 2e-07, "loss": 0.0216, "step": 152 }, { "clip_ratio/high_max": 0.0023092168776202016, "clip_ratio/high_mean": 0.0009610258930479176, "clip_ratio/low_mean": 0.0005577738083957229, "clip_ratio/low_min": 1.2768130545737222e-05, "clip_ratio/region_mean": 0.001518799712357577, "epoch": 0.01427996430008925, "grad_norm": 0.12360531091690063, "learning_rate": 2e-07, "loss": -0.0018, "step": 153 }, { "clip_ratio/high_max": 0.002036969286564272, "clip_ratio/high_mean": 0.0007977804089023266, "clip_ratio/low_mean": 0.000683551774272928, "clip_ratio/low_min": 0.00014017854618941783, "clip_ratio/region_mean": 0.001481332212279085, "epoch": 0.014373297400089833, "grad_norm": 0.12436694651842117, "learning_rate": 2e-07, "loss": 0.0487, "step": 154 }, { "clip_ratio/high_max": 0.0018554799644334707, "clip_ratio/high_mean": 0.0007821693252481055, "clip_ratio/low_mean": 0.0006520995912069338, "clip_ratio/low_min": 1.504573901911499e-05, "clip_ratio/region_mean": 0.0014342689501063433, "epoch": 0.014466630500090416, "grad_norm": 0.13415321707725525, "learning_rate": 2e-07, "loss": 0.0595, "step": 155 }, { "clip_ratio/high_max": 0.0019837784639094025, "clip_ratio/high_mean": 0.0007853263778088149, "clip_ratio/low_mean": 0.000601817437200225, "clip_ratio/low_min": 4.322710810811259e-05, "clip_ratio/region_mean": 0.0013871437986381352, "epoch": 0.014559963600091, "grad_norm": 0.10944689810276031, "learning_rate": 2e-07, "loss": 0.0403, "step": 156 }, { "clip_ratio/high_max": 0.0019150325388181955, "clip_ratio/high_mean": 0.0008243473075708607, "clip_ratio/low_mean": 0.0006404053456208203, "clip_ratio/low_min": 5.4258492127701174e-05, "clip_ratio/region_mean": 0.0014647526768385433, "epoch": 0.014653296700091583, "grad_norm": 0.14248211681842804, "learning_rate": 2e-07, "loss": 0.0554, "step": 157 }, { "clip_ratio/high_max": 0.001759488815878285, "clip_ratio/high_mean": 0.0007669052720302716, "clip_ratio/low_mean": 0.000700808735928149, "clip_ratio/low_min": 6.52748140055337e-05, "clip_ratio/region_mean": 0.001467713991587516, "epoch": 0.014746629800092167, "grad_norm": 0.1275532841682434, "learning_rate": 2e-07, "loss": 0.059, "step": 158 }, { "clip_ratio/high_max": 0.0019940488346037455, "clip_ratio/high_mean": 0.0008210186497308314, "clip_ratio/low_mean": 0.0006319907206489006, "clip_ratio/low_min": 7.883350917836651e-05, "clip_ratio/region_mean": 0.0014530093831126578, "epoch": 0.01483996290009275, "grad_norm": 0.11434304714202881, "learning_rate": 2e-07, "loss": 0.0733, "step": 159 }, { "clip_ratio/high_max": 0.00192092436918756, "clip_ratio/high_mean": 0.0008790222673269454, "clip_ratio/low_mean": 0.00056511196817155, "clip_ratio/low_min": 4.336935035098577e-05, "clip_ratio/region_mean": 0.001444134279154241, "epoch": 0.014933296000093334, "grad_norm": 0.11234098672866821, "learning_rate": 2e-07, "loss": 0.034, "step": 160 }, { "clip_ratio/high_max": 0.0023246791679412127, "clip_ratio/high_mean": 0.0008866738226060988, "clip_ratio/low_mean": 0.000663228694065765, "clip_ratio/low_min": 0.00010616056715662125, "clip_ratio/region_mean": 0.001549902546685189, "epoch": 0.015026629100093917, "grad_norm": 0.1266254037618637, "learning_rate": 2e-07, "loss": 0.0478, "step": 161 }, { "clip_ratio/high_max": 0.001965323681361042, "clip_ratio/high_mean": 0.000820959572592983, "clip_ratio/low_mean": 0.0006965995326027041, "clip_ratio/low_min": 6.833672978245886e-05, "clip_ratio/region_mean": 0.0015175591324805282, "epoch": 0.015119962200094499, "grad_norm": 0.12695853412151337, "learning_rate": 2e-07, "loss": 0.0075, "step": 162 }, { "clip_ratio/high_max": 0.0021713865135097876, "clip_ratio/high_mean": 0.0009276664750359487, "clip_ratio/low_mean": 0.0005310947935868171, "clip_ratio/low_min": 6.268439938139636e-05, "clip_ratio/region_mean": 0.0014587612531613559, "epoch": 0.015213295300095082, "grad_norm": 0.12529891729354858, "learning_rate": 2e-07, "loss": 0.0053, "step": 163 }, { "clip_ratio/high_max": 0.0018675526880542748, "clip_ratio/high_mean": 0.0007227962632896379, "clip_ratio/low_mean": 0.0006287226315180305, "clip_ratio/low_min": 2.9932191864645574e-05, "clip_ratio/region_mean": 0.0013515188729797956, "epoch": 0.015306628400095666, "grad_norm": 0.12023717910051346, "learning_rate": 2e-07, "loss": 0.0749, "step": 164 }, { "clip_ratio/high_max": 0.002090888279781211, "clip_ratio/high_mean": 0.0008774506513873348, "clip_ratio/low_mean": 0.0005721439583794563, "clip_ratio/low_min": 2.457126356603112e-05, "clip_ratio/region_mean": 0.0014495946270471904, "epoch": 0.01539996150009625, "grad_norm": 0.11546096205711365, "learning_rate": 2e-07, "loss": 0.0076, "step": 165 }, { "clip_ratio/high_max": 0.0017688236985122785, "clip_ratio/high_mean": 0.0007386200277323951, "clip_ratio/low_mean": 0.0005452568257169332, "clip_ratio/low_min": 4.906442427454749e-05, "clip_ratio/region_mean": 0.0012838768343499396, "epoch": 0.015493294600096833, "grad_norm": 0.12498149275779724, "learning_rate": 2e-07, "loss": 0.0233, "step": 166 }, { "clip_ratio/high_max": 0.0017236603343917523, "clip_ratio/high_mean": 0.0007574530100100674, "clip_ratio/low_mean": 0.0005989382302686863, "clip_ratio/low_min": 7.755311025903211e-05, "clip_ratio/region_mean": 0.0013563912325480487, "epoch": 0.015586627700097416, "grad_norm": 0.12754815816879272, "learning_rate": 2e-07, "loss": 0.0157, "step": 167 }, { "clip_ratio/high_max": 0.002258755936054513, "clip_ratio/high_mean": 0.0008547649904357968, "clip_ratio/low_mean": 0.0006209034127095947, "clip_ratio/low_min": 2.3443568352377042e-05, "clip_ratio/region_mean": 0.0014756683995074127, "epoch": 0.015679960800098, "grad_norm": 0.14414598047733307, "learning_rate": 2e-07, "loss": 0.0245, "step": 168 }, { "clip_ratio/high_max": 0.001955513438588241, "clip_ratio/high_mean": 0.0008623196681583067, "clip_ratio/low_mean": 0.000602567555688438, "clip_ratio/low_min": 3.983179158240091e-05, "clip_ratio/region_mean": 0.0014648872165707871, "epoch": 0.015773293900098583, "grad_norm": 0.12495750188827515, "learning_rate": 2e-07, "loss": 0.0332, "step": 169 }, { "clip_ratio/high_max": 0.00223838776582852, "clip_ratio/high_mean": 0.0009298213062720606, "clip_ratio/low_mean": 0.0005195967887630104, "clip_ratio/low_min": 2.4004822989809327e-05, "clip_ratio/region_mean": 0.0014494181341433432, "epoch": 0.015866627000099165, "grad_norm": 0.11634866893291473, "learning_rate": 2e-07, "loss": -0.0111, "step": 170 }, { "clip_ratio/high_max": 0.0021104725237819366, "clip_ratio/high_mean": 0.0009034130289364839, "clip_ratio/low_mean": 0.0006394947768058046, "clip_ratio/low_min": 2.704045346035855e-05, "clip_ratio/region_mean": 0.0015429078266606666, "epoch": 0.01595996010009975, "grad_norm": 0.12048265337944031, "learning_rate": 2e-07, "loss": -0.0062, "step": 171 }, { "clip_ratio/high_max": 0.0020956102343916427, "clip_ratio/high_mean": 0.0008470688499073731, "clip_ratio/low_mean": 0.0005501982650457649, "clip_ratio/low_min": 1.2948000403412152e-05, "clip_ratio/region_mean": 0.0013972670894872863, "epoch": 0.016053293200100332, "grad_norm": 0.11529058963060379, "learning_rate": 2e-07, "loss": 0.0049, "step": 172 }, { "clip_ratio/high_max": 0.0017215226071130019, "clip_ratio/high_mean": 0.000741100457162247, "clip_ratio/low_mean": 0.0005466114726004889, "clip_ratio/low_min": 8.460390927211847e-05, "clip_ratio/region_mean": 0.0012877119443146512, "epoch": 0.016146626300100917, "grad_norm": 0.19731692969799042, "learning_rate": 2e-07, "loss": -0.0122, "step": 173 }, { "clip_ratio/high_max": 0.0020964827090210747, "clip_ratio/high_mean": 0.0008398496756854001, "clip_ratio/low_mean": 0.000642816225081333, "clip_ratio/low_min": 4.039527357235784e-05, "clip_ratio/region_mean": 0.0014826658516540192, "epoch": 0.0162399594001015, "grad_norm": 2.250884532928467, "learning_rate": 2e-07, "loss": 0.0139, "step": 174 }, { "clip_ratio/high_max": 0.0017920345781021751, "clip_ratio/high_mean": 0.0007806333560438361, "clip_ratio/low_mean": 0.0005344555156625574, "clip_ratio/low_min": 2.5148067834379617e-05, "clip_ratio/region_mean": 0.0013150888698874041, "epoch": 0.016333292500102084, "grad_norm": 0.11930711567401886, "learning_rate": 2e-07, "loss": 0.0729, "step": 175 }, { "clip_ratio/high_max": 0.0021007739487686194, "clip_ratio/high_mean": 0.0008999269703053869, "clip_ratio/low_mean": 0.0006451310036936775, "clip_ratio/low_min": 4.622362757800147e-05, "clip_ratio/region_mean": 0.0015450579521711916, "epoch": 0.016426625600102666, "grad_norm": 0.1278805285692215, "learning_rate": 2e-07, "loss": -0.0563, "step": 176 }, { "clip_ratio/high_max": 0.0018664096351130866, "clip_ratio/high_mean": 0.0008674918408360099, "clip_ratio/low_mean": 0.0006662578452960588, "clip_ratio/low_min": 9.164878065348603e-05, "clip_ratio/region_mean": 0.0015337497497966979, "epoch": 0.01651995870010325, "grad_norm": 0.1221580058336258, "learning_rate": 2e-07, "loss": 0.0413, "step": 177 }, { "clip_ratio/high_max": 0.0024445316157652996, "clip_ratio/high_mean": 0.00103787755506346, "clip_ratio/low_mean": 0.0004956101593052153, "clip_ratio/low_min": 2.184457662224304e-05, "clip_ratio/region_mean": 0.001533487724373117, "epoch": 0.016613291800103833, "grad_norm": 0.1286056935787201, "learning_rate": 2e-07, "loss": -0.01, "step": 178 }, { "clip_ratio/high_max": 0.002251833699119743, "clip_ratio/high_mean": 0.0008355304053111468, "clip_ratio/low_mean": 0.0006643825654464308, "clip_ratio/low_min": 9.426724409422604e-05, "clip_ratio/region_mean": 0.0014999129198258743, "epoch": 0.016706624900104418, "grad_norm": 0.1179148331284523, "learning_rate": 2e-07, "loss": 0.062, "step": 179 }, { "clip_ratio/high_max": 0.0020663910036091693, "clip_ratio/high_mean": 0.0008590448924223892, "clip_ratio/low_mean": 0.0006636037269345252, "clip_ratio/low_min": 4.826983877137536e-05, "clip_ratio/region_mean": 0.0015226486430037767, "epoch": 0.016799958000105, "grad_norm": 0.12851257622241974, "learning_rate": 2e-07, "loss": 0.0184, "step": 180 }, { "clip_ratio/high_max": 0.002138312134775333, "clip_ratio/high_mean": 0.000860536572872661, "clip_ratio/low_mean": 0.0006943785938346991, "clip_ratio/low_min": 9.249441563952132e-05, "clip_ratio/region_mean": 0.0015549151648883708, "epoch": 0.016893291100105585, "grad_norm": 0.12586717307567596, "learning_rate": 2e-07, "loss": 0.0697, "step": 181 }, { "clip_ratio/high_max": 0.001994305133848684, "clip_ratio/high_mean": 0.0008847359622450313, "clip_ratio/low_mean": 0.0006013852180331014, "clip_ratio/low_min": 2.4066374862741213e-05, "clip_ratio/region_mean": 0.0014861211602692492, "epoch": 0.016986624200106167, "grad_norm": 0.12542015314102173, "learning_rate": 2e-07, "loss": -0.0015, "step": 182 }, { "clip_ratio/high_max": 0.0021351301948016044, "clip_ratio/high_mean": 0.0008703667645022506, "clip_ratio/low_mean": 0.0005913864242756972, "clip_ratio/low_min": 2.936852160928538e-05, "clip_ratio/region_mean": 0.0014617531596741173, "epoch": 0.017079957300106748, "grad_norm": 0.12585563957691193, "learning_rate": 2e-07, "loss": 0.0139, "step": 183 }, { "clip_ratio/high_max": 0.001900650306197349, "clip_ratio/high_mean": 0.000849069129799318, "clip_ratio/low_mean": 0.0005743024203184177, "clip_ratio/low_min": 4.715670183941256e-05, "clip_ratio/region_mean": 0.0014233715446607675, "epoch": 0.017173290400107333, "grad_norm": 0.12186791747808456, "learning_rate": 2e-07, "loss": 0.0206, "step": 184 }, { "clip_ratio/high_max": 0.0019776165863731876, "clip_ratio/high_mean": 0.0007868615903134923, "clip_ratio/low_mean": 0.0005875941787962802, "clip_ratio/low_min": 5.011553002987057e-05, "clip_ratio/region_mean": 0.0013744557945756242, "epoch": 0.017266623500107915, "grad_norm": 0.10793115943670273, "learning_rate": 2e-07, "loss": 0.056, "step": 185 }, { "clip_ratio/high_max": 0.0021568940683209803, "clip_ratio/high_mean": 0.0009200328386214096, "clip_ratio/low_mean": 0.0006038145170350617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015238473424687982, "epoch": 0.0173599566001085, "grad_norm": 0.11519882082939148, "learning_rate": 2e-07, "loss": -0.0292, "step": 186 }, { "clip_ratio/high_max": 0.0019966778563684784, "clip_ratio/high_mean": 0.0008219665505748708, "clip_ratio/low_mean": 0.0006578429274668451, "clip_ratio/low_min": 8.048551444517216e-05, "clip_ratio/region_mean": 0.0014798094925936311, "epoch": 0.017453289700109082, "grad_norm": 0.1287655085325241, "learning_rate": 2e-07, "loss": 0.0465, "step": 187 }, { "clip_ratio/high_max": 0.0021091887829243205, "clip_ratio/high_mean": 0.0008488497842336074, "clip_ratio/low_mean": 0.0006047210217730026, "clip_ratio/low_min": 2.9717120924033225e-05, "clip_ratio/region_mean": 0.0014535707741742954, "epoch": 0.017546622800109667, "grad_norm": 0.12241791933774948, "learning_rate": 2e-07, "loss": 0.029, "step": 188 }, { "clip_ratio/high_max": 0.0023509508682764135, "clip_ratio/high_mean": 0.0008480851465719752, "clip_ratio/low_mean": 0.0007244535954669118, "clip_ratio/low_min": 3.68049904864165e-05, "clip_ratio/region_mean": 0.0015725387493148446, "epoch": 0.01763995590011025, "grad_norm": 0.1332457959651947, "learning_rate": 2e-07, "loss": 0.046, "step": 189 }, { "clip_ratio/high_max": 0.0018754965713014826, "clip_ratio/high_mean": 0.0008037635889195371, "clip_ratio/low_mean": 0.0005728645019189571, "clip_ratio/low_min": 3.0046778192627244e-05, "clip_ratio/region_mean": 0.0013766280790150631, "epoch": 0.017733289000110834, "grad_norm": 0.1131281703710556, "learning_rate": 2e-07, "loss": -0.0006, "step": 190 }, { "clip_ratio/high_max": 0.0019312634904053994, "clip_ratio/high_mean": 0.0008665046079840977, "clip_ratio/low_mean": 0.0005508429767360212, "clip_ratio/low_min": 1.4019739865034353e-05, "clip_ratio/region_mean": 0.0014173475792631507, "epoch": 0.017826622100111416, "grad_norm": 0.11831708252429962, "learning_rate": 2e-07, "loss": 0.0125, "step": 191 }, { "clip_ratio/high_max": 0.0016764222746132873, "clip_ratio/high_mean": 0.0007296729600057006, "clip_ratio/low_mean": 0.0006575298739335267, "clip_ratio/low_min": 6.532936185976723e-05, "clip_ratio/region_mean": 0.0013872028393961955, "epoch": 0.017919955200112, "grad_norm": 0.10915565490722656, "learning_rate": 2e-07, "loss": 0.0278, "step": 192 }, { "clip_ratio/high_max": 0.0020736096957989503, "clip_ratio/high_mean": 0.0008751838431635406, "clip_ratio/low_mean": 0.0006219078331923811, "clip_ratio/low_min": 5.614155998046044e-05, "clip_ratio/region_mean": 0.0014970917254686356, "epoch": 0.018013288300112583, "grad_norm": 0.12327812612056732, "learning_rate": 2e-07, "loss": 0.0206, "step": 193 }, { "clip_ratio/high_max": 0.0019329669885337353, "clip_ratio/high_mean": 0.0008085431254585274, "clip_ratio/low_mean": 0.0005968813748040702, "clip_ratio/low_min": 6.745711471012328e-05, "clip_ratio/region_mean": 0.0014054245002625976, "epoch": 0.018106621400113165, "grad_norm": 0.10631323605775833, "learning_rate": 2e-07, "loss": 0.0183, "step": 194 }, { "clip_ratio/high_max": 0.0019297026810818352, "clip_ratio/high_mean": 0.0008301530360768083, "clip_ratio/low_mean": 0.0005512141869985498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013813671976095065, "epoch": 0.01819995450011375, "grad_norm": 0.11766605079174042, "learning_rate": 2e-07, "loss": 0.0104, "step": 195 }, { "clip_ratio/high_max": 0.0018602242234919686, "clip_ratio/high_mean": 0.0007717592852714006, "clip_ratio/low_mean": 0.000565861793802469, "clip_ratio/low_min": 2.5805120458244346e-05, "clip_ratio/region_mean": 0.0013376210663409438, "epoch": 0.01829328760011433, "grad_norm": 0.11108557134866714, "learning_rate": 2e-07, "loss": 0.0552, "step": 196 }, { "clip_ratio/high_max": 0.0018241533398395404, "clip_ratio/high_mean": 0.0007565808900835691, "clip_ratio/low_mean": 0.0005454191395983798, "clip_ratio/low_min": 6.979340923862765e-06, "clip_ratio/region_mean": 0.0013020000369579066, "epoch": 0.018386620700114917, "grad_norm": 0.11559087038040161, "learning_rate": 2e-07, "loss": -0.0046, "step": 197 }, { "clip_ratio/high_max": 0.0016000511604943313, "clip_ratio/high_mean": 0.0007448696287610801, "clip_ratio/low_mean": 0.000572046064917231, "clip_ratio/low_min": 6.896324248373276e-05, "clip_ratio/region_mean": 0.0013169156518415548, "epoch": 0.0184799538001155, "grad_norm": 0.11472797393798828, "learning_rate": 2e-07, "loss": 0.0171, "step": 198 }, { "clip_ratio/high_max": 0.0018787748158501927, "clip_ratio/high_mean": 0.000733202389710641, "clip_ratio/low_mean": 0.0007538214013038669, "clip_ratio/low_min": 4.63871319880127e-05, "clip_ratio/region_mean": 0.001487023735535331, "epoch": 0.018573286900116084, "grad_norm": 0.11530959606170654, "learning_rate": 2e-07, "loss": 0.0682, "step": 199 }, { "clip_ratio/high_max": 0.002174064182327129, "clip_ratio/high_mean": 0.0008117097750073299, "clip_ratio/low_mean": 0.0005790070249531709, "clip_ratio/low_min": 1.6009220416890457e-05, "clip_ratio/region_mean": 0.0013907168176956475, "epoch": 0.018666620000116665, "grad_norm": 0.13620182871818542, "learning_rate": 2e-07, "loss": 0.0161, "step": 200 }, { "clip_ratio/high_max": 0.0022332727239700034, "clip_ratio/high_mean": 0.0008888625179679366, "clip_ratio/low_mean": 0.0006720081582898274, "clip_ratio/low_min": 4.810716382053215e-05, "clip_ratio/region_mean": 0.0015608706744387746, "epoch": 0.01875995310011725, "grad_norm": 0.1197165995836258, "learning_rate": 2e-07, "loss": 0.0713, "step": 201 }, { "clip_ratio/high_max": 0.0021171529006096534, "clip_ratio/high_mean": 0.0009134918227573507, "clip_ratio/low_mean": 0.0006080609291529981, "clip_ratio/low_min": 4.032635115436278e-05, "clip_ratio/region_mean": 0.0015215527782856952, "epoch": 0.018853286200117832, "grad_norm": 0.12558649480342865, "learning_rate": 2e-07, "loss": 0.057, "step": 202 }, { "clip_ratio/high_max": 0.001816671861888608, "clip_ratio/high_mean": 0.0007296011772268685, "clip_ratio/low_mean": 0.0006923889523022808, "clip_ratio/low_min": 7.941850981296739e-05, "clip_ratio/region_mean": 0.0014219901386240963, "epoch": 0.018946619300118418, "grad_norm": 0.11568861454725266, "learning_rate": 2e-07, "loss": 0.0494, "step": 203 }, { "clip_ratio/high_max": 0.0021382658305810764, "clip_ratio/high_mean": 0.0008898287469492061, "clip_ratio/low_mean": 0.0005603459685517009, "clip_ratio/low_min": 6.999934339546598e-05, "clip_ratio/region_mean": 0.0014501747136819176, "epoch": 0.019039952400119, "grad_norm": 0.11714370548725128, "learning_rate": 2e-07, "loss": 0.0178, "step": 204 }, { "clip_ratio/high_max": 0.00179005935206078, "clip_ratio/high_mean": 0.00077575969407917, "clip_ratio/low_mean": 0.0006000151151965838, "clip_ratio/low_min": 7.392168663500343e-05, "clip_ratio/region_mean": 0.0013757748165517114, "epoch": 0.019133285500119585, "grad_norm": 0.125470370054245, "learning_rate": 2e-07, "loss": 0.0517, "step": 205 }, { "clip_ratio/high_max": 0.001654505012993468, "clip_ratio/high_mean": 0.0007185303638834739, "clip_ratio/low_mean": 0.0006323754241748247, "clip_ratio/low_min": 5.656904886564007e-05, "clip_ratio/region_mean": 0.0013509057862393092, "epoch": 0.019226618600120166, "grad_norm": 0.11505354195833206, "learning_rate": 2e-07, "loss": 0.0449, "step": 206 }, { "clip_ratio/high_max": 0.0017064742241927888, "clip_ratio/high_mean": 0.0007828243415133329, "clip_ratio/low_mean": 0.0006331469085125718, "clip_ratio/low_min": 6.827588549640495e-05, "clip_ratio/region_mean": 0.001415971255482873, "epoch": 0.019319951700120748, "grad_norm": 0.10872121155261993, "learning_rate": 2e-07, "loss": 0.0203, "step": 207 }, { "clip_ratio/high_max": 0.0020829406639677472, "clip_ratio/high_mean": 0.0009138266286754515, "clip_ratio/low_mean": 0.0005890915581403533, "clip_ratio/low_min": 1.3334755749383476e-05, "clip_ratio/region_mean": 0.001502918174082879, "epoch": 0.019413284800121333, "grad_norm": 0.11202314496040344, "learning_rate": 2e-07, "loss": 0.0047, "step": 208 }, { "clip_ratio/high_max": 0.002092193597491132, "clip_ratio/high_mean": 0.0007727962038188707, "clip_ratio/low_mean": 0.0005881238721485715, "clip_ratio/low_min": 1.8502072634873912e-05, "clip_ratio/region_mean": 0.0013609200468636118, "epoch": 0.019506617900121915, "grad_norm": 0.12486797571182251, "learning_rate": 2e-07, "loss": 0.0047, "step": 209 }, { "clip_ratio/high_max": 0.0021771567699033767, "clip_ratio/high_mean": 0.0008790578212938271, "clip_ratio/low_mean": 0.0006164459973660996, "clip_ratio/low_min": 5.278173648548545e-05, "clip_ratio/region_mean": 0.0014955038495827466, "epoch": 0.0195999510001225, "grad_norm": 0.12480311095714569, "learning_rate": 2e-07, "loss": 0.0349, "step": 210 }, { "clip_ratio/high_max": 0.002391279565927107, "clip_ratio/high_mean": 0.0008715305011719465, "clip_ratio/low_mean": 0.0006326712973532267, "clip_ratio/low_min": 2.483539628883591e-05, "clip_ratio/region_mean": 0.0015042017912492156, "epoch": 0.019693284100123082, "grad_norm": 0.12396135181188583, "learning_rate": 2e-07, "loss": -0.0096, "step": 211 }, { "clip_ratio/high_max": 0.001972062160348287, "clip_ratio/high_mean": 0.000841363966173958, "clip_ratio/low_mean": 0.0005959939471722464, "clip_ratio/low_min": 2.3692191462032497e-05, "clip_ratio/region_mean": 0.001437357896065805, "epoch": 0.019786617200123667, "grad_norm": 0.12970447540283203, "learning_rate": 2e-07, "loss": 0.0062, "step": 212 }, { "clip_ratio/high_max": 0.0020741428234032355, "clip_ratio/high_mean": 0.0009173517100862227, "clip_ratio/low_mean": 0.0006286472653300734, "clip_ratio/low_min": 5.0465879212424625e-05, "clip_ratio/region_mean": 0.0015459989808732644, "epoch": 0.01987995030012425, "grad_norm": 0.1226431280374527, "learning_rate": 2e-07, "loss": 0.0055, "step": 213 }, { "clip_ratio/high_max": 0.0019818857763311826, "clip_ratio/high_mean": 0.0008492987453792011, "clip_ratio/low_mean": 0.000629013577963633, "clip_ratio/low_min": 4.6589530938945245e-05, "clip_ratio/region_mean": 0.001478312329709297, "epoch": 0.019973283400124834, "grad_norm": 0.11172264814376831, "learning_rate": 2e-07, "loss": 0.0389, "step": 214 }, { "clip_ratio/high_max": 0.002069979236694053, "clip_ratio/high_mean": 0.000865060013893526, "clip_ratio/low_mean": 0.0005486258087330498, "clip_ratio/low_min": 2.34817125601694e-05, "clip_ratio/region_mean": 0.0014136857935227454, "epoch": 0.020066616500125416, "grad_norm": 0.12068255990743637, "learning_rate": 2e-07, "loss": 0.0359, "step": 215 }, { "clip_ratio/high_max": 0.0019663180137285963, "clip_ratio/high_mean": 0.0008473750822304282, "clip_ratio/low_mean": 0.0007438137436110992, "clip_ratio/low_min": 7.713813101872802e-05, "clip_ratio/region_mean": 0.0015911888549453579, "epoch": 0.020159949600126, "grad_norm": 0.12602101266384125, "learning_rate": 2e-07, "loss": 0.0466, "step": 216 }, { "clip_ratio/high_max": 0.0020827313237532508, "clip_ratio/high_mean": 0.0008149411751219304, "clip_ratio/low_mean": 0.0005110658830744796, "clip_ratio/low_min": 3.48286448570434e-05, "clip_ratio/region_mean": 0.0013260070591059048, "epoch": 0.020253282700126583, "grad_norm": 0.11434406042098999, "learning_rate": 2e-07, "loss": -0.0277, "step": 217 }, { "clip_ratio/high_max": 0.0019906498891941737, "clip_ratio/high_mean": 0.0009144391351583181, "clip_ratio/low_mean": 0.0006791747746319743, "clip_ratio/low_min": 1.315789450018201e-05, "clip_ratio/region_mean": 0.0015936139388941228, "epoch": 0.020346615800127168, "grad_norm": 0.12674777209758759, "learning_rate": 2e-07, "loss": 0.0021, "step": 218 }, { "clip_ratio/high_max": 0.0018587054801173508, "clip_ratio/high_mean": 0.0008060272903094301, "clip_ratio/low_mean": 0.0007025926606729627, "clip_ratio/low_min": 5.370995995690464e-05, "clip_ratio/region_mean": 0.0015086199418874457, "epoch": 0.02043994890012775, "grad_norm": 0.11981428414583206, "learning_rate": 2e-07, "loss": 0.048, "step": 219 }, { "clip_ratio/high_max": 0.002145351372746518, "clip_ratio/high_mean": 0.0009035408475028817, "clip_ratio/low_mean": 0.0006624667430514819, "clip_ratio/low_min": 3.271596688136924e-05, "clip_ratio/region_mean": 0.0015660075914638583, "epoch": 0.02053328200012833, "grad_norm": 0.12608177959918976, "learning_rate": 2e-07, "loss": -0.0036, "step": 220 }, { "clip_ratio/high_max": 0.0023064735723892227, "clip_ratio/high_mean": 0.0009404887641721871, "clip_ratio/low_mean": 0.0006884559879836161, "clip_ratio/low_min": 2.2530487967742374e-05, "clip_ratio/region_mean": 0.0016289447667077184, "epoch": 0.020626615100128916, "grad_norm": 0.13023404777050018, "learning_rate": 2e-07, "loss": 0.0106, "step": 221 }, { "clip_ratio/high_max": 0.0019826841926260386, "clip_ratio/high_mean": 0.0008284481846203562, "clip_ratio/low_mean": 0.0006198056271387031, "clip_ratio/low_min": 7.347455539274961e-05, "clip_ratio/region_mean": 0.001448253788112197, "epoch": 0.020719948200129498, "grad_norm": 0.130168154835701, "learning_rate": 2e-07, "loss": 0.0399, "step": 222 }, { "clip_ratio/high_max": 0.0017674262198852375, "clip_ratio/high_mean": 0.0007272366019606125, "clip_ratio/low_mean": 0.0005965747932350496, "clip_ratio/low_min": 2.0964268514944706e-05, "clip_ratio/region_mean": 0.0013238114006526303, "epoch": 0.020813281300130083, "grad_norm": 0.12362027168273926, "learning_rate": 2e-07, "loss": 0.0214, "step": 223 }, { "clip_ratio/high_max": 0.001974437616809155, "clip_ratio/high_mean": 0.0008042374165597721, "clip_ratio/low_mean": 0.0005606582571999752, "clip_ratio/low_min": 2.183406104450114e-05, "clip_ratio/region_mean": 0.0013648956883116625, "epoch": 0.020906614400130665, "grad_norm": 0.12424493581056595, "learning_rate": 2e-07, "loss": -0.006, "step": 224 }, { "clip_ratio/high_max": 0.0022053325810702518, "clip_ratio/high_mean": 0.0009020308098115493, "clip_ratio/low_mean": 0.0006508156320705893, "clip_ratio/low_min": 6.897086586832302e-05, "clip_ratio/region_mean": 0.0015528464136878029, "epoch": 0.02099994750013125, "grad_norm": 0.1252783089876175, "learning_rate": 2e-07, "loss": 0.0173, "step": 225 }, { "clip_ratio/high_max": 0.0017065121937775984, "clip_ratio/high_mean": 0.0006967523586354218, "clip_ratio/low_mean": 0.0007028692743915599, "clip_ratio/low_min": 7.727163165327511e-05, "clip_ratio/region_mean": 0.0013996216148370877, "epoch": 0.021093280600131832, "grad_norm": 0.12578731775283813, "learning_rate": 2e-07, "loss": 0.0664, "step": 226 }, { "clip_ratio/high_max": 0.0019523410956026055, "clip_ratio/high_mean": 0.0008163893962773727, "clip_ratio/low_mean": 0.0006067543345125159, "clip_ratio/low_min": 2.7364272682461888e-05, "clip_ratio/region_mean": 0.001423143741703825, "epoch": 0.021186613700132417, "grad_norm": 0.116533063352108, "learning_rate": 2e-07, "loss": 0.0355, "step": 227 }, { "clip_ratio/high_max": 0.0020167624861642253, "clip_ratio/high_mean": 0.0008077914826571941, "clip_ratio/low_mean": 0.0007490758625863236, "clip_ratio/low_min": 9.619301545171766e-05, "clip_ratio/region_mean": 0.0015568673479720019, "epoch": 0.021279946800133, "grad_norm": 0.12544459104537964, "learning_rate": 2e-07, "loss": 0.0459, "step": 228 }, { "clip_ratio/high_max": 0.002426959756121505, "clip_ratio/high_mean": 0.0010647100280039012, "clip_ratio/low_mean": 0.0007087066551321186, "clip_ratio/low_min": 5.299648728396278e-05, "clip_ratio/region_mean": 0.0017734167122398503, "epoch": 0.021373279900133584, "grad_norm": 0.1109149381518364, "learning_rate": 2e-07, "loss": -0.0055, "step": 229 }, { "clip_ratio/high_max": 0.0021228285440884065, "clip_ratio/high_mean": 0.0009695321532490198, "clip_ratio/low_mean": 0.0006713399761792971, "clip_ratio/low_min": 9.498201325186528e-05, "clip_ratio/region_mean": 0.001640872153075179, "epoch": 0.021466613000134166, "grad_norm": 0.1239905133843422, "learning_rate": 2e-07, "loss": -0.0334, "step": 230 }, { "clip_ratio/high_max": 0.002044136206677649, "clip_ratio/high_mean": 0.000907914338313276, "clip_ratio/low_mean": 0.0005890498614462558, "clip_ratio/low_min": 7.365920646407176e-05, "clip_ratio/region_mean": 0.0014969642215874046, "epoch": 0.02155994610013475, "grad_norm": 0.14024090766906738, "learning_rate": 2e-07, "loss": -0.0074, "step": 231 }, { "clip_ratio/high_max": 0.002295651338499738, "clip_ratio/high_mean": 0.0008678561080159852, "clip_ratio/low_mean": 0.0006381597686413443, "clip_ratio/low_min": 2.9640562388522085e-05, "clip_ratio/region_mean": 0.001506015865743393, "epoch": 0.021653279200135333, "grad_norm": 0.12719973921775818, "learning_rate": 2e-07, "loss": 0.0464, "step": 232 }, { "clip_ratio/high_max": 0.0018199050609837286, "clip_ratio/high_mean": 0.0007057171333144652, "clip_ratio/low_mean": 0.0005441948096631677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012499119475251064, "epoch": 0.021746612300135915, "grad_norm": 0.11361807584762573, "learning_rate": 2e-07, "loss": 0.0467, "step": 233 }, { "clip_ratio/high_max": 0.0018442862856318243, "clip_ratio/high_mean": 0.0008334170324815204, "clip_ratio/low_mean": 0.0006894128564454149, "clip_ratio/low_min": 5.868424705113284e-05, "clip_ratio/region_mean": 0.0015228298907459248, "epoch": 0.0218399454001365, "grad_norm": 0.11941342800855637, "learning_rate": 2e-07, "loss": 0.034, "step": 234 }, { "clip_ratio/high_max": 0.0023222617237479426, "clip_ratio/high_mean": 0.0008665252553328173, "clip_ratio/low_mean": 0.0006782530235796003, "clip_ratio/low_min": 5.201616841077339e-05, "clip_ratio/region_mean": 0.0015447782752744388, "epoch": 0.02193327850013708, "grad_norm": 0.12980884313583374, "learning_rate": 2e-07, "loss": 0.0232, "step": 235 }, { "clip_ratio/high_max": 0.002173518791096285, "clip_ratio/high_mean": 0.0008693928975844756, "clip_ratio/low_mean": 0.0005926298354097526, "clip_ratio/low_min": 3.802351511694724e-05, "clip_ratio/region_mean": 0.0014620227375417016, "epoch": 0.022026611600137667, "grad_norm": 0.11835992336273193, "learning_rate": 2e-07, "loss": 0.007, "step": 236 }, { "clip_ratio/high_max": 0.002036824640526902, "clip_ratio/high_mean": 0.0008447859781881562, "clip_ratio/low_mean": 0.0005561790476349415, "clip_ratio/low_min": 3.0056125979172066e-05, "clip_ratio/region_mean": 0.001400965040375013, "epoch": 0.02211994470013825, "grad_norm": 0.12677739560604095, "learning_rate": 2e-07, "loss": 0.0413, "step": 237 }, { "clip_ratio/high_max": 0.002024779278144706, "clip_ratio/high_mean": 0.0008605479779362213, "clip_ratio/low_mean": 0.000603546957790968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014640948866144754, "epoch": 0.022213277800138834, "grad_norm": 0.12738028168678284, "learning_rate": 2e-07, "loss": 0.003, "step": 238 }, { "clip_ratio/high_max": 0.0022869312233524397, "clip_ratio/high_mean": 0.0009744043636601418, "clip_ratio/low_mean": 0.0006235647360881558, "clip_ratio/low_min": 8.00459529273212e-05, "clip_ratio/region_mean": 0.0015979691233951598, "epoch": 0.022306610900139415, "grad_norm": 0.11834923923015594, "learning_rate": 2e-07, "loss": 0.0093, "step": 239 }, { "clip_ratio/high_max": 0.002176742971641943, "clip_ratio/high_mean": 0.0009521244392090011, "clip_ratio/low_mean": 0.0006671634837402962, "clip_ratio/low_min": 1.589117709954735e-05, "clip_ratio/region_mean": 0.0016192878865695093, "epoch": 0.02239994400014, "grad_norm": 0.11782267689704895, "learning_rate": 2e-07, "loss": 0.0234, "step": 240 }, { "clip_ratio/high_max": 0.0018703921305132098, "clip_ratio/high_mean": 0.0007780053701935685, "clip_ratio/low_mean": 0.0006393818130163709, "clip_ratio/low_min": 3.492377709335415e-05, "clip_ratio/region_mean": 0.0014173871895764023, "epoch": 0.022493277100140582, "grad_norm": 0.12588801980018616, "learning_rate": 2e-07, "loss": 0.0652, "step": 241 }, { "clip_ratio/high_max": 0.0018498727258702274, "clip_ratio/high_mean": 0.0008320328215631889, "clip_ratio/low_mean": 0.0006495431707662647, "clip_ratio/low_min": 2.240544927190058e-05, "clip_ratio/region_mean": 0.0014815760077908635, "epoch": 0.022586610200141168, "grad_norm": 0.12297002971172333, "learning_rate": 2e-07, "loss": 0.0517, "step": 242 }, { "clip_ratio/high_max": 0.0025394732219865546, "clip_ratio/high_mean": 0.0009917907082126476, "clip_ratio/low_mean": 0.0006418006814783439, "clip_ratio/low_min": 1.5122187505767215e-05, "clip_ratio/region_mean": 0.0016335914115188643, "epoch": 0.02267994330014175, "grad_norm": 0.1303921788930893, "learning_rate": 2e-07, "loss": 0.0123, "step": 243 }, { "clip_ratio/high_max": 0.0018134244055545423, "clip_ratio/high_mean": 0.0008182700239558471, "clip_ratio/low_mean": 0.0006754147943865974, "clip_ratio/low_min": 7.968447880557505e-05, "clip_ratio/region_mean": 0.0014936848237994127, "epoch": 0.022773276400142334, "grad_norm": 0.11415337771177292, "learning_rate": 2e-07, "loss": 0.065, "step": 244 }, { "clip_ratio/high_max": 0.0019734601955860853, "clip_ratio/high_mean": 0.0007973790052346885, "clip_ratio/low_mean": 0.0007755887327220989, "clip_ratio/low_min": 0.00012020656322420109, "clip_ratio/region_mean": 0.0015729677324998192, "epoch": 0.022866609500142916, "grad_norm": 0.12819279730319977, "learning_rate": 2e-07, "loss": 0.0787, "step": 245 }, { "clip_ratio/high_max": 0.0021143411067896523, "clip_ratio/high_mean": 0.0008259684655058663, "clip_ratio/low_mean": 0.0007045072779874317, "clip_ratio/low_min": 2.680677607713733e-05, "clip_ratio/region_mean": 0.0015304757507692557, "epoch": 0.022959942600143498, "grad_norm": 0.11841915547847748, "learning_rate": 2e-07, "loss": 0.0555, "step": 246 }, { "clip_ratio/high_max": 0.00227278806414688, "clip_ratio/high_mean": 0.0009006211002997588, "clip_ratio/low_mean": 0.0006637595542997587, "clip_ratio/low_min": 1.3397641851042863e-05, "clip_ratio/region_mean": 0.0015643806364096235, "epoch": 0.023053275700144083, "grad_norm": 0.13094264268875122, "learning_rate": 2e-07, "loss": 0.001, "step": 247 }, { "clip_ratio/high_max": 0.0017917007025971543, "clip_ratio/high_mean": 0.0008561914746678667, "clip_ratio/low_mean": 0.0006776778118364746, "clip_ratio/low_min": 9.029080320033245e-05, "clip_ratio/region_mean": 0.0015338692792283837, "epoch": 0.023146608800144665, "grad_norm": 0.13803403079509735, "learning_rate": 2e-07, "loss": 0.0164, "step": 248 }, { "clip_ratio/high_max": 0.002043408567260485, "clip_ratio/high_mean": 0.0009841881419561105, "clip_ratio/low_mean": 0.0006766336173313903, "clip_ratio/low_min": 3.073063726333203e-05, "clip_ratio/region_mean": 0.001660821755649522, "epoch": 0.02323994190014525, "grad_norm": 0.11938658356666565, "learning_rate": 2e-07, "loss": 0.0092, "step": 249 }, { "clip_ratio/high_max": 0.001939498797582928, "clip_ratio/high_mean": 0.0008849788864608854, "clip_ratio/low_mean": 0.0006844468753115507, "clip_ratio/low_min": 7.482086039090063e-05, "clip_ratio/region_mean": 0.0015694257599534467, "epoch": 0.023333275000145832, "grad_norm": 0.13686634600162506, "learning_rate": 2e-07, "loss": -0.0155, "step": 250 }, { "clip_ratio/high_max": 0.0020375457461341284, "clip_ratio/high_mean": 0.0008434641786152497, "clip_ratio/low_mean": 0.000673469254252268, "clip_ratio/low_min": 6.995611875026952e-05, "clip_ratio/region_mean": 0.0015169333855737932, "epoch": 0.023426608100146417, "grad_norm": 0.12419265508651733, "learning_rate": 2e-07, "loss": 0.0366, "step": 251 }, { "clip_ratio/high_max": 0.001976864852622384, "clip_ratio/high_mean": 0.0007229638449643971, "clip_ratio/low_mean": 0.0006403792831406463, "clip_ratio/low_min": 3.325263060105499e-05, "clip_ratio/region_mean": 0.0013633431335620116, "epoch": 0.023519941200147, "grad_norm": 0.13820812106132507, "learning_rate": 2e-07, "loss": 0.0486, "step": 252 }, { "clip_ratio/high_max": 0.0022484115324914455, "clip_ratio/high_mean": 0.0009520504554529907, "clip_ratio/low_mean": 0.0006318347568594618, "clip_ratio/low_min": 2.1632093194057234e-05, "clip_ratio/region_mean": 0.0015838852123124525, "epoch": 0.023613274300147584, "grad_norm": 0.11094287782907486, "learning_rate": 2e-07, "loss": -0.0021, "step": 253 }, { "clip_ratio/high_max": 0.0020970325131202117, "clip_ratio/high_mean": 0.0009178951549984049, "clip_ratio/low_mean": 0.0006947758975002216, "clip_ratio/low_min": 8.198030263883993e-05, "clip_ratio/region_mean": 0.0016126710834214464, "epoch": 0.023706607400148166, "grad_norm": 0.13793335855007172, "learning_rate": 2e-07, "loss": 0.0663, "step": 254 }, { "clip_ratio/high_max": 0.002247452164738206, "clip_ratio/high_mean": 0.0009144013238255866, "clip_ratio/low_mean": 0.0005964958008917165, "clip_ratio/low_min": 4.1346617308590794e-05, "clip_ratio/region_mean": 0.00151089714927366, "epoch": 0.02379994050014875, "grad_norm": 0.12533582746982574, "learning_rate": 2e-07, "loss": 0.0329, "step": 255 }, { "clip_ratio/high_max": 0.002352266419620719, "clip_ratio/high_mean": 0.0010341808338125702, "clip_ratio/low_mean": 0.0006419627470677369, "clip_ratio/low_min": 1.1469994205981493e-05, "clip_ratio/region_mean": 0.0016761435545049608, "epoch": 0.023893273600149333, "grad_norm": 0.11967840790748596, "learning_rate": 2e-07, "loss": -0.0008, "step": 256 }, { "clip_ratio/high_max": 0.0018839719341485761, "clip_ratio/high_mean": 0.0007135008636396378, "clip_ratio/low_mean": 0.0005426309635367943, "clip_ratio/low_min": 8.57927261677105e-06, "clip_ratio/region_mean": 0.001256131839909358, "completions/clipped_ratio": 0.0160609654017857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 624.572998046875, "completions/mean_terminated_length": 567.9083862304688, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.023986606700149918, "grad_norm": 0.11500854045152664, "learning_rate": 2e-07, "loss": 0.0634, "num_tokens": 245222416.0, "reward": 0.56463623046875, "reward_std": 0.20198029279708862, "rewards/simpleverify_reward/mean": 0.56463623046875, "rewards/simpleverify_reward/std": 0.49580687284469604, "step": 257 }, { "clip_ratio/high_max": 0.0020199282880639657, "clip_ratio/high_mean": 0.0008201083528547315, "clip_ratio/low_mean": 0.00047565761997248046, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012957659782841802, "epoch": 0.0240799398001505, "grad_norm": 0.1208253875374794, "learning_rate": 2e-07, "loss": -0.0123, "step": 258 }, { "clip_ratio/high_max": 0.001963495298696216, "clip_ratio/high_mean": 0.0007769777894282015, "clip_ratio/low_mean": 0.0006154828006401658, "clip_ratio/low_min": 3.069697959290352e-05, "clip_ratio/region_mean": 0.001392460588249378, "epoch": 0.02417327290015108, "grad_norm": 0.11618202924728394, "learning_rate": 2e-07, "loss": 0.0545, "step": 259 }, { "clip_ratio/high_max": 0.001976386509340955, "clip_ratio/high_mean": 0.0008198244486266049, "clip_ratio/low_mean": 0.0005461222081066808, "clip_ratio/low_min": 2.5431217181903776e-05, "clip_ratio/region_mean": 0.0013659466712852009, "epoch": 0.024266606000151666, "grad_norm": 0.11154364049434662, "learning_rate": 2e-07, "loss": 0.028, "step": 260 }, { "clip_ratio/high_max": 0.0018495485601306427, "clip_ratio/high_mean": 0.000679276070513879, "clip_ratio/low_mean": 0.0005829149340570439, "clip_ratio/low_min": 7.651429677935084e-05, "clip_ratio/region_mean": 0.0012621909845620394, "epoch": 0.024359939100152248, "grad_norm": 0.4491668939590454, "learning_rate": 2e-07, "loss": 0.0628, "step": 261 }, { "clip_ratio/high_max": 0.0018992861478182022, "clip_ratio/high_mean": 0.0006883132709845086, "clip_ratio/low_mean": 0.0006296134815784171, "clip_ratio/low_min": 4.2889173528237734e-05, "clip_ratio/region_mean": 0.0013179267371015158, "epoch": 0.024453272200152833, "grad_norm": 0.11347917467355728, "learning_rate": 2e-07, "loss": 0.0586, "step": 262 }, { "clip_ratio/high_max": 0.0017797858381527476, "clip_ratio/high_mean": 0.0007452798672602512, "clip_ratio/low_mean": 0.000546596672393207, "clip_ratio/low_min": 3.882692544721067e-05, "clip_ratio/region_mean": 0.0012918765569338575, "epoch": 0.024546605300153415, "grad_norm": 0.11700635403394699, "learning_rate": 2e-07, "loss": 0.0273, "step": 263 }, { "clip_ratio/high_max": 0.0018411456439935137, "clip_ratio/high_mean": 0.0007784780245856382, "clip_ratio/low_mean": 0.000527014772160328, "clip_ratio/low_min": 3.753527744265739e-05, "clip_ratio/region_mean": 0.0013054928167548496, "epoch": 0.024639938400154, "grad_norm": 0.12003087252378464, "learning_rate": 2e-07, "loss": 0.0119, "step": 264 }, { "clip_ratio/high_max": 0.0022165566369949374, "clip_ratio/high_mean": 0.0008499307905367459, "clip_ratio/low_mean": 0.0005198766684770817, "clip_ratio/low_min": 6.738482807122637e-05, "clip_ratio/region_mean": 0.0013698074690182693, "epoch": 0.024733271500154582, "grad_norm": 0.12093736976385117, "learning_rate": 2e-07, "loss": 0.0082, "step": 265 }, { "clip_ratio/high_max": 0.0018259815697092563, "clip_ratio/high_mean": 0.0007838963738322491, "clip_ratio/low_mean": 0.0006334642166621052, "clip_ratio/low_min": 7.806645044183824e-06, "clip_ratio/region_mean": 0.0014173605595715344, "epoch": 0.024826604600155167, "grad_norm": 0.12317376583814621, "learning_rate": 2e-07, "loss": 0.0203, "step": 266 }, { "clip_ratio/high_max": 0.0022587031780858524, "clip_ratio/high_mean": 0.0008465351620543515, "clip_ratio/low_mean": 0.0005003897140340996, "clip_ratio/low_min": 5.588116619037464e-05, "clip_ratio/region_mean": 0.0013469249061017763, "epoch": 0.02491993770015575, "grad_norm": 0.13149158656597137, "learning_rate": 2e-07, "loss": 0.0193, "step": 267 }, { "clip_ratio/high_max": 0.00210473397601163, "clip_ratio/high_mean": 0.0007792920096107991, "clip_ratio/low_mean": 0.0005549281613639323, "clip_ratio/low_min": 4.625537621905096e-05, "clip_ratio/region_mean": 0.0013342201673367526, "epoch": 0.025013270800156334, "grad_norm": 0.11912455409765244, "learning_rate": 2e-07, "loss": 0.0332, "step": 268 }, { "clip_ratio/high_max": 0.0020448992763704155, "clip_ratio/high_mean": 0.0007903182545305754, "clip_ratio/low_mean": 0.0005470241758303018, "clip_ratio/low_min": 1.4524750440614298e-05, "clip_ratio/region_mean": 0.0013373424408200663, "epoch": 0.025106603900156916, "grad_norm": 0.11905215680599213, "learning_rate": 2e-07, "loss": 0.0681, "step": 269 }, { "clip_ratio/high_max": 0.0014461008613579907, "clip_ratio/high_mean": 0.0006477457636719919, "clip_ratio/low_mean": 0.0006582269779755734, "clip_ratio/low_min": 3.421683322812896e-05, "clip_ratio/region_mean": 0.0013059727461950388, "epoch": 0.0251999370001575, "grad_norm": 0.11372499167919159, "learning_rate": 2e-07, "loss": 0.0561, "step": 270 }, { "clip_ratio/high_max": 0.0017878697617561556, "clip_ratio/high_mean": 0.0007543820247519761, "clip_ratio/low_mean": 0.0005058662263763836, "clip_ratio/low_min": 3.363247105880873e-05, "clip_ratio/region_mean": 0.0012602482420334127, "epoch": 0.025293270100158083, "grad_norm": 0.1179540678858757, "learning_rate": 2e-07, "loss": 0.0371, "step": 271 }, { "clip_ratio/high_max": 0.0017493020968686324, "clip_ratio/high_mean": 0.0007540766355305095, "clip_ratio/low_mean": 0.0005946931278231204, "clip_ratio/low_min": 7.5796783221449e-05, "clip_ratio/region_mean": 0.0013487697469827253, "epoch": 0.025386603200158665, "grad_norm": 0.12303470820188522, "learning_rate": 2e-07, "loss": 0.0453, "step": 272 }, { "clip_ratio/high_max": 0.0018768651243590284, "clip_ratio/high_mean": 0.0007730286706646439, "clip_ratio/low_mean": 0.0006108778288762551, "clip_ratio/low_min": 2.951593887701165e-05, "clip_ratio/region_mean": 0.0013839064704370685, "epoch": 0.02547993630015925, "grad_norm": 0.13033203780651093, "learning_rate": 2e-07, "loss": 0.0211, "step": 273 }, { "clip_ratio/high_max": 0.00197576292703161, "clip_ratio/high_mean": 0.0009061270029633306, "clip_ratio/low_mean": 0.0005172456221771426, "clip_ratio/low_min": 2.4377088266192004e-05, "clip_ratio/region_mean": 0.0014233726142265368, "epoch": 0.02557326940015983, "grad_norm": 0.12400084733963013, "learning_rate": 2e-07, "loss": -0.0233, "step": 274 }, { "clip_ratio/high_max": 0.0020519799400062766, "clip_ratio/high_mean": 0.0007785065954521997, "clip_ratio/low_mean": 0.0004813198793272022, "clip_ratio/low_min": 2.377148121013306e-05, "clip_ratio/region_mean": 0.0012598264547705185, "epoch": 0.025666602500160417, "grad_norm": 0.1072271540760994, "learning_rate": 2e-07, "loss": 0.019, "step": 275 }, { "clip_ratio/high_max": 0.0017076392396120355, "clip_ratio/high_mean": 0.0007884213264333084, "clip_ratio/low_mean": 0.000621120158029953, "clip_ratio/low_min": 5.097129178466275e-05, "clip_ratio/region_mean": 0.0014095414844632614, "epoch": 0.025759935600161, "grad_norm": 0.13034513592720032, "learning_rate": 2e-07, "loss": 0.0026, "step": 276 }, { "clip_ratio/high_max": 0.001769192298525013, "clip_ratio/high_mean": 0.0007256751177919796, "clip_ratio/low_mean": 0.0006207737815202563, "clip_ratio/low_min": 3.349593316670507e-05, "clip_ratio/region_mean": 0.0013464488984027412, "epoch": 0.025853268700161584, "grad_norm": 0.12212289869785309, "learning_rate": 2e-07, "loss": 0.0235, "step": 277 }, { "clip_ratio/high_max": 0.0020236209165886976, "clip_ratio/high_mean": 0.0007744238591840258, "clip_ratio/low_mean": 0.0006517259444080992, "clip_ratio/low_min": 4.1901329495885875e-05, "clip_ratio/region_mean": 0.0014261497926781885, "epoch": 0.025946601800162165, "grad_norm": 0.12455925345420837, "learning_rate": 2e-07, "loss": 0.0616, "step": 278 }, { "clip_ratio/high_max": 0.00199477250862401, "clip_ratio/high_mean": 0.0008391673800360877, "clip_ratio/low_mean": 0.0005476566584547982, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013868240384908859, "epoch": 0.02603993490016275, "grad_norm": 0.11874571442604065, "learning_rate": 2e-07, "loss": 0.0277, "step": 279 }, { "clip_ratio/high_max": 0.0021788684753119014, "clip_ratio/high_mean": 0.0008542817795387236, "clip_ratio/low_mean": 0.0005357002028176794, "clip_ratio/low_min": 2.6899075237452053e-05, "clip_ratio/region_mean": 0.0013899819750804454, "epoch": 0.026133268000163332, "grad_norm": 0.12170732766389847, "learning_rate": 2e-07, "loss": 0.0168, "step": 280 }, { "clip_ratio/high_max": 0.0020501477592915762, "clip_ratio/high_mean": 0.000721589800377842, "clip_ratio/low_mean": 0.0006146431642264361, "clip_ratio/low_min": 7.955637329359888e-05, "clip_ratio/region_mean": 0.0013362329518713523, "epoch": 0.026226601100163918, "grad_norm": 0.12555696070194244, "learning_rate": 2e-07, "loss": 0.0949, "step": 281 }, { "clip_ratio/high_max": 0.0022244991560000926, "clip_ratio/high_mean": 0.0008334208596352255, "clip_ratio/low_mean": 0.000513205976858444, "clip_ratio/low_min": 5.205479101277888e-05, "clip_ratio/region_mean": 0.0013466268428601325, "epoch": 0.0263199342001645, "grad_norm": 0.11201972514390945, "learning_rate": 2e-07, "loss": -0.0102, "step": 282 }, { "clip_ratio/high_max": 0.0019736876784008928, "clip_ratio/high_mean": 0.000796609245298896, "clip_ratio/low_mean": 0.0005864941977051785, "clip_ratio/low_min": 4.224031090416247e-05, "clip_ratio/region_mean": 0.0013831034484610427, "epoch": 0.026413267300165084, "grad_norm": 0.12162583321332932, "learning_rate": 2e-07, "loss": 0.0265, "step": 283 }, { "clip_ratio/high_max": 0.002068604364467319, "clip_ratio/high_mean": 0.000845280812427518, "clip_ratio/low_mean": 0.0005790795057691867, "clip_ratio/low_min": 1.198236168420408e-05, "clip_ratio/region_mean": 0.0014243603254726622, "epoch": 0.026506600400165666, "grad_norm": 0.11316550523042679, "learning_rate": 2e-07, "loss": 0.0104, "step": 284 }, { "clip_ratio/high_max": 0.002168106057069963, "clip_ratio/high_mean": 0.0008575835363444639, "clip_ratio/low_mean": 0.00046572769178965245, "clip_ratio/low_min": 1.3009991562284995e-05, "clip_ratio/region_mean": 0.001323311236774316, "epoch": 0.026599933500166248, "grad_norm": 0.09966593235731125, "learning_rate": 2e-07, "loss": -0.0008, "step": 285 }, { "clip_ratio/high_max": 0.0022890284017194062, "clip_ratio/high_mean": 0.0007924565652501769, "clip_ratio/low_mean": 0.000595366280322196, "clip_ratio/low_min": 6.697939079458592e-05, "clip_ratio/region_mean": 0.001387822841934394, "epoch": 0.026693266600166833, "grad_norm": 0.11711356043815613, "learning_rate": 2e-07, "loss": -0.0254, "step": 286 }, { "clip_ratio/high_max": 0.0020460956075112335, "clip_ratio/high_mean": 0.0009111475010286085, "clip_ratio/low_mean": 0.0005603526178674656, "clip_ratio/low_min": 4.2565307467157254e-05, "clip_ratio/region_mean": 0.0014715000688738655, "epoch": 0.026786599700167415, "grad_norm": 0.12362494319677353, "learning_rate": 2e-07, "loss": -0.0175, "step": 287 }, { "clip_ratio/high_max": 0.002082897630316438, "clip_ratio/high_mean": 0.0008626818107586587, "clip_ratio/low_mean": 0.0006432107165892376, "clip_ratio/low_min": 6.818192377977539e-05, "clip_ratio/region_mean": 0.0015058925091580022, "epoch": 0.026879932800168, "grad_norm": 0.13926564157009125, "learning_rate": 2e-07, "loss": 0.0225, "step": 288 }, { "clip_ratio/high_max": 0.0016591042767686304, "clip_ratio/high_mean": 0.0007356801124842605, "clip_ratio/low_mean": 0.0006420888676075265, "clip_ratio/low_min": 7.559329151263228e-05, "clip_ratio/region_mean": 0.0013777689782727975, "epoch": 0.026973265900168582, "grad_norm": 0.11378282308578491, "learning_rate": 2e-07, "loss": 0.0273, "step": 289 }, { "clip_ratio/high_max": 0.001975459399545798, "clip_ratio/high_mean": 0.0007609944350406295, "clip_ratio/low_mean": 0.0006458952648245031, "clip_ratio/low_min": 8.118589175865054e-05, "clip_ratio/region_mean": 0.0014068896780372597, "epoch": 0.027066599000169167, "grad_norm": 0.12239658087491989, "learning_rate": 2e-07, "loss": 0.0178, "step": 290 }, { "clip_ratio/high_max": 0.0019237046362832189, "clip_ratio/high_mean": 0.00080548742243991, "clip_ratio/low_mean": 0.0005979554644000018, "clip_ratio/low_min": 2.6338419047533534e-05, "clip_ratio/region_mean": 0.0014034428786544595, "epoch": 0.02715993210016975, "grad_norm": 0.12757523357868195, "learning_rate": 2e-07, "loss": 0.0361, "step": 291 }, { "clip_ratio/high_max": 0.002089562356559327, "clip_ratio/high_mean": 0.0008999970268632751, "clip_ratio/low_mean": 0.0005720423478123848, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001472039395594038, "epoch": 0.027253265200170334, "grad_norm": 0.13246272504329681, "learning_rate": 2e-07, "loss": -0.0256, "step": 292 }, { "clip_ratio/high_max": 0.0019675435578392353, "clip_ratio/high_mean": 0.0008336705650435761, "clip_ratio/low_mean": 0.0006249542948353337, "clip_ratio/low_min": 1.942260769283166e-05, "clip_ratio/region_mean": 0.0014586248762498144, "epoch": 0.027346598300170916, "grad_norm": 0.1290530413389206, "learning_rate": 2e-07, "loss": 0.0003, "step": 293 }, { "clip_ratio/high_max": 0.001981994937523268, "clip_ratio/high_mean": 0.0008153691960615106, "clip_ratio/low_mean": 0.0006392068844434107, "clip_ratio/low_min": 5.741790300817229e-05, "clip_ratio/region_mean": 0.0014545760714099742, "epoch": 0.0274399314001715, "grad_norm": 0.12548260390758514, "learning_rate": 2e-07, "loss": 0.0588, "step": 294 }, { "clip_ratio/high_max": 0.002162057840905618, "clip_ratio/high_mean": 0.0008298467564600287, "clip_ratio/low_mean": 0.0005505874732989469, "clip_ratio/low_min": 5.910718118684599e-05, "clip_ratio/region_mean": 0.0013804342488583643, "epoch": 0.027533264500172083, "grad_norm": 0.11832530796527863, "learning_rate": 2e-07, "loss": -0.0042, "step": 295 }, { "clip_ratio/high_max": 0.001881218639027793, "clip_ratio/high_mean": 0.0007035325634205947, "clip_ratio/low_mean": 0.0006120294356151135, "clip_ratio/low_min": 3.050314262509346e-05, "clip_ratio/region_mean": 0.0013155620035831816, "epoch": 0.027626597600172668, "grad_norm": 0.11982741206884384, "learning_rate": 2e-07, "loss": 0.0259, "step": 296 }, { "clip_ratio/high_max": 0.0019271934943390079, "clip_ratio/high_mean": 0.000762846093493863, "clip_ratio/low_mean": 0.0004931095863867085, "clip_ratio/low_min": 1.4945002476451918e-05, "clip_ratio/region_mean": 0.0012559556344058365, "epoch": 0.02771993070017325, "grad_norm": 0.12474443763494492, "learning_rate": 2e-07, "loss": -0.0155, "step": 297 }, { "clip_ratio/high_max": 0.0017164243763545528, "clip_ratio/high_mean": 0.0007956518056744244, "clip_ratio/low_mean": 0.0006176739188958891, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014133257500361651, "epoch": 0.02781326380017383, "grad_norm": 0.12053424119949341, "learning_rate": 2e-07, "loss": 0.0178, "step": 298 }, { "clip_ratio/high_max": 0.002117894357070327, "clip_ratio/high_mean": 0.0008695551005075686, "clip_ratio/low_mean": 0.0005955539336355287, "clip_ratio/low_min": 5.094632888358319e-05, "clip_ratio/region_mean": 0.0014651090059487615, "epoch": 0.027906596900174416, "grad_norm": 0.12493390589952469, "learning_rate": 2e-07, "loss": -0.0111, "step": 299 }, { "clip_ratio/high_max": 0.0017723021519486792, "clip_ratio/high_mean": 0.0007359522596743773, "clip_ratio/low_mean": 0.0005927807542320807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001328733007539995, "epoch": 0.027999930000174998, "grad_norm": 0.11270361393690109, "learning_rate": 2e-07, "loss": 0.027, "step": 300 }, { "clip_ratio/high_max": 0.001885232632048428, "clip_ratio/high_mean": 0.0007097050356605905, "clip_ratio/low_mean": 0.000500872033626365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012105771129427012, "epoch": 0.028093263100175583, "grad_norm": 0.11546891182661057, "learning_rate": 2e-07, "loss": -0.0078, "step": 301 }, { "clip_ratio/high_max": 0.0020113050850341097, "clip_ratio/high_mean": 0.0007935145658848342, "clip_ratio/low_mean": 0.0006199763811309822, "clip_ratio/low_min": 4.739719042845536e-05, "clip_ratio/region_mean": 0.0014134909397398587, "epoch": 0.028186596200176165, "grad_norm": 0.11855800449848175, "learning_rate": 2e-07, "loss": 0.0093, "step": 302 }, { "clip_ratio/high_max": 0.0016922742797760293, "clip_ratio/high_mean": 0.0007418187815346755, "clip_ratio/low_mean": 0.000576145540435391, "clip_ratio/low_min": 8.622129644209053e-05, "clip_ratio/region_mean": 0.0013179643246985506, "epoch": 0.02827992930017675, "grad_norm": 0.11983189731836319, "learning_rate": 2e-07, "loss": 0.0435, "step": 303 }, { "clip_ratio/high_max": 0.0017247748328372836, "clip_ratio/high_mean": 0.0006810644117649645, "clip_ratio/low_mean": 0.0006676435987174045, "clip_ratio/low_min": 1.668891854933463e-05, "clip_ratio/region_mean": 0.001348708010482369, "epoch": 0.028373262400177332, "grad_norm": 0.1171589195728302, "learning_rate": 2e-07, "loss": 0.0802, "step": 304 }, { "clip_ratio/high_max": 0.0017667813444859348, "clip_ratio/high_mean": 0.0007061848082230426, "clip_ratio/low_mean": 0.0006128011073087691, "clip_ratio/low_min": 7.494501551263966e-05, "clip_ratio/region_mean": 0.0013189858982514124, "epoch": 0.028466595500177917, "grad_norm": 0.1098601296544075, "learning_rate": 2e-07, "loss": 0.0461, "step": 305 }, { "clip_ratio/high_max": 0.0019367154745850712, "clip_ratio/high_mean": 0.0007358881030086195, "clip_ratio/low_mean": 0.0005631079966406105, "clip_ratio/low_min": 5.482938468048815e-05, "clip_ratio/region_mean": 0.0012989960923732724, "epoch": 0.0285599286001785, "grad_norm": 0.14602519571781158, "learning_rate": 2e-07, "loss": 0.0485, "step": 306 }, { "clip_ratio/high_max": 0.001934049476403743, "clip_ratio/high_mean": 0.0007644662800885271, "clip_ratio/low_mean": 0.0005840450530740782, "clip_ratio/low_min": 3.194714190613013e-05, "clip_ratio/region_mean": 0.0013485113558999728, "epoch": 0.028653261700179084, "grad_norm": 0.11243382096290588, "learning_rate": 2e-07, "loss": 0.0134, "step": 307 }, { "clip_ratio/high_max": 0.0018117145846190397, "clip_ratio/high_mean": 0.0007272939483300433, "clip_ratio/low_mean": 0.000588181259445264, "clip_ratio/low_min": 2.4352177206310444e-05, "clip_ratio/region_mean": 0.0013154752232367173, "epoch": 0.028746594800179666, "grad_norm": 0.11952472478151321, "learning_rate": 2e-07, "loss": 0.031, "step": 308 }, { "clip_ratio/high_max": 0.0019460992734821048, "clip_ratio/high_mean": 0.0007769055373501033, "clip_ratio/low_mean": 0.0005817851870233426, "clip_ratio/low_min": 8.295551469927887e-05, "clip_ratio/region_mean": 0.0013586907080025412, "epoch": 0.02883992790018025, "grad_norm": 0.12298327684402466, "learning_rate": 2e-07, "loss": 0.0289, "step": 309 }, { "clip_ratio/high_max": 0.002010919059102889, "clip_ratio/high_mean": 0.0008190365260816179, "clip_ratio/low_mean": 0.0006089615490054712, "clip_ratio/low_min": 5.207685626373859e-05, "clip_ratio/region_mean": 0.0014279980823630467, "epoch": 0.028933261000180833, "grad_norm": 0.11988940089941025, "learning_rate": 2e-07, "loss": 0.0283, "step": 310 }, { "clip_ratio/high_max": 0.0017923747000168078, "clip_ratio/high_mean": 0.0007273753617482726, "clip_ratio/low_mean": 0.0006182983888720628, "clip_ratio/low_min": 2.9629219170601573e-05, "clip_ratio/region_mean": 0.0013456737251544837, "epoch": 0.029026594100181415, "grad_norm": 0.11644458770751953, "learning_rate": 2e-07, "loss": 0.0216, "step": 311 }, { "clip_ratio/high_max": 0.00213154007360572, "clip_ratio/high_mean": 0.0008717468808754347, "clip_ratio/low_mean": 0.000591704045291408, "clip_ratio/low_min": 4.444415844773175e-05, "clip_ratio/region_mean": 0.0014634509207098745, "epoch": 0.029119927200182, "grad_norm": 0.1220645010471344, "learning_rate": 2e-07, "loss": 0.0159, "step": 312 }, { "clip_ratio/high_max": 0.0018709412033786066, "clip_ratio/high_mean": 0.0007958844744280213, "clip_ratio/low_mean": 0.000547395866306033, "clip_ratio/low_min": 5.651607898471411e-05, "clip_ratio/region_mean": 0.0013432803862087894, "epoch": 0.02921326030018258, "grad_norm": 0.12182912975549698, "learning_rate": 2e-07, "loss": 0.0179, "step": 313 }, { "clip_ratio/high_max": 0.0021812400191265624, "clip_ratio/high_mean": 0.0008657605922053335, "clip_ratio/low_mean": 0.0006336235774142551, "clip_ratio/low_min": 2.370547372265719e-05, "clip_ratio/region_mean": 0.0014993842123658396, "epoch": 0.029306593400183167, "grad_norm": 0.13201750814914703, "learning_rate": 2e-07, "loss": -0.0119, "step": 314 }, { "clip_ratio/high_max": 0.0018607751626404934, "clip_ratio/high_mean": 0.0008000515354069648, "clip_ratio/low_mean": 0.0005376875606089015, "clip_ratio/low_min": 2.385868447163375e-05, "clip_ratio/region_mean": 0.0013377391223912127, "epoch": 0.02939992650018375, "grad_norm": 0.1170421838760376, "learning_rate": 2e-07, "loss": -0.0161, "step": 315 }, { "clip_ratio/high_max": 0.001861179025581805, "clip_ratio/high_mean": 0.0007701203685428482, "clip_ratio/low_mean": 0.00055040425468178, "clip_ratio/low_min": 2.3599748601554893e-05, "clip_ratio/region_mean": 0.001320524654147448, "epoch": 0.029493259600184334, "grad_norm": 0.1363784670829773, "learning_rate": 2e-07, "loss": 0.061, "step": 316 }, { "clip_ratio/high_max": 0.0016596122513874434, "clip_ratio/high_mean": 0.0006897079992995714, "clip_ratio/low_mean": 0.0006067104877729435, "clip_ratio/low_min": 2.9315197025425732e-05, "clip_ratio/region_mean": 0.0012964184752490837, "epoch": 0.029586592700184915, "grad_norm": 0.12399421632289886, "learning_rate": 2e-07, "loss": 0.052, "step": 317 }, { "clip_ratio/high_max": 0.001756465331709478, "clip_ratio/high_mean": 0.0007147569340304472, "clip_ratio/low_mean": 0.0006465763053711271, "clip_ratio/low_min": 2.2465852453024127e-05, "clip_ratio/region_mean": 0.0013613332375825848, "epoch": 0.0296799258001855, "grad_norm": 0.1299685686826706, "learning_rate": 2e-07, "loss": 0.0476, "step": 318 }, { "clip_ratio/high_max": 0.0018803434613801073, "clip_ratio/high_mean": 0.000824014428872033, "clip_ratio/low_mean": 0.0006059326715330826, "clip_ratio/low_min": 5.092688661534339e-05, "clip_ratio/region_mean": 0.0014299470785772428, "epoch": 0.029773258900186082, "grad_norm": 0.11528966575860977, "learning_rate": 2e-07, "loss": -0.0083, "step": 319 }, { "clip_ratio/high_max": 0.001972535806999076, "clip_ratio/high_mean": 0.0008278733785118675, "clip_ratio/low_mean": 0.0006067261183488881, "clip_ratio/low_min": 3.0108059036138002e-05, "clip_ratio/region_mean": 0.001434599471394904, "epoch": 0.029866592000186667, "grad_norm": 0.11708053946495056, "learning_rate": 2e-07, "loss": 0.0415, "step": 320 }, { "clip_ratio/high_max": 0.0020763563370564952, "clip_ratio/high_mean": 0.0008158107484632637, "clip_ratio/low_mean": 0.0006800573701184476, "clip_ratio/low_min": 5.878196043340722e-05, "clip_ratio/region_mean": 0.0014958680803829338, "epoch": 0.02995992510018725, "grad_norm": 0.13091234862804413, "learning_rate": 2e-07, "loss": 0.076, "step": 321 }, { "clip_ratio/high_max": 0.002138528216164559, "clip_ratio/high_mean": 0.0008708736786502413, "clip_ratio/low_mean": 0.0005694980063708499, "clip_ratio/low_min": 2.0395849332999205e-05, "clip_ratio/region_mean": 0.0014403717177629005, "epoch": 0.030053258200187834, "grad_norm": 0.11137208342552185, "learning_rate": 2e-07, "loss": 0.0081, "step": 322 }, { "clip_ratio/high_max": 0.002068605197564466, "clip_ratio/high_mean": 0.0008424160223512445, "clip_ratio/low_mean": 0.0005714419839932816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014138580081635155, "epoch": 0.030146591300188416, "grad_norm": 0.12551407516002655, "learning_rate": 2e-07, "loss": 0.0279, "step": 323 }, { "clip_ratio/high_max": 0.0018103445327142254, "clip_ratio/high_mean": 0.0007343616525758989, "clip_ratio/low_mean": 0.0006544732777911122, "clip_ratio/low_min": 2.798298555717338e-05, "clip_ratio/region_mean": 0.0013888349203625694, "epoch": 0.030239924400188998, "grad_norm": 0.1211436465382576, "learning_rate": 2e-07, "loss": 0.031, "step": 324 }, { "clip_ratio/high_max": 0.0020055357745150104, "clip_ratio/high_mean": 0.0008711673071957193, "clip_ratio/low_mean": 0.0005298917421896476, "clip_ratio/low_min": 1.4243390978663228e-05, "clip_ratio/region_mean": 0.0014010590712132398, "epoch": 0.030333257500189583, "grad_norm": 0.13711117208003998, "learning_rate": 2e-07, "loss": -0.022, "step": 325 }, { "clip_ratio/high_max": 0.0018533126713009551, "clip_ratio/high_mean": 0.0007326084505621111, "clip_ratio/low_mean": 0.0006404056166502414, "clip_ratio/low_min": 5.2162255087750964e-05, "clip_ratio/region_mean": 0.001373014096316183, "epoch": 0.030426590600190165, "grad_norm": 0.11760517954826355, "learning_rate": 2e-07, "loss": 0.049, "step": 326 }, { "clip_ratio/high_max": 0.0018047416233457625, "clip_ratio/high_mean": 0.0007827441459085094, "clip_ratio/low_mean": 0.0005671492308465531, "clip_ratio/low_min": 3.614993238443276e-05, "clip_ratio/region_mean": 0.0013498934204108082, "epoch": 0.03051992370019075, "grad_norm": 0.11555701494216919, "learning_rate": 2e-07, "loss": 0.0177, "step": 327 }, { "clip_ratio/high_max": 0.0017075175055651926, "clip_ratio/high_mean": 0.0007701397462369641, "clip_ratio/low_mean": 0.0007354669214691967, "clip_ratio/low_min": 7.073225606291089e-05, "clip_ratio/region_mean": 0.0015056066513352562, "epoch": 0.030613256800191332, "grad_norm": 0.1212574914097786, "learning_rate": 2e-07, "loss": 0.0526, "step": 328 }, { "clip_ratio/high_max": 0.0020462254105950706, "clip_ratio/high_mean": 0.0009032924881466897, "clip_ratio/low_mean": 0.0005376549534048536, "clip_ratio/low_min": 5.068782320449827e-05, "clip_ratio/region_mean": 0.001440947424271144, "epoch": 0.030706589900191917, "grad_norm": 0.12562961876392365, "learning_rate": 2e-07, "loss": -0.0429, "step": 329 }, { "clip_ratio/high_max": 0.002181030620704405, "clip_ratio/high_mean": 0.000819687740658992, "clip_ratio/low_mean": 0.0006825181417298154, "clip_ratio/low_min": 5.209079790802207e-05, "clip_ratio/region_mean": 0.0015022058432805352, "epoch": 0.0307999230001925, "grad_norm": 0.12798555195331573, "learning_rate": 2e-07, "loss": 0.0583, "step": 330 }, { "clip_ratio/high_max": 0.0021890079624427017, "clip_ratio/high_mean": 0.0008083746370175504, "clip_ratio/low_mean": 0.0006119201716501266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001420294811396161, "epoch": 0.030893256100193084, "grad_norm": 0.13179506361484528, "learning_rate": 2e-07, "loss": 0.0636, "step": 331 }, { "clip_ratio/high_max": 0.0024998585577122867, "clip_ratio/high_mean": 0.0009136358130490407, "clip_ratio/low_mean": 0.0006552305185323348, "clip_ratio/low_min": 6.915092399140121e-05, "clip_ratio/region_mean": 0.0015688663406763226, "epoch": 0.030986589200193666, "grad_norm": 0.1430823802947998, "learning_rate": 2e-07, "loss": -0.0113, "step": 332 }, { "clip_ratio/high_max": 0.002172136850276729, "clip_ratio/high_mean": 0.0009052669865923235, "clip_ratio/low_mean": 0.0006395005293597933, "clip_ratio/low_min": 4.831981368624838e-05, "clip_ratio/region_mean": 0.0015447675323230214, "epoch": 0.03107992230019425, "grad_norm": 0.13516907393932343, "learning_rate": 2e-07, "loss": -0.0033, "step": 333 }, { "clip_ratio/high_max": 0.0021146919461898506, "clip_ratio/high_mean": 0.0008844261210469995, "clip_ratio/low_mean": 0.0006489717516160454, "clip_ratio/low_min": 5.190768752072472e-05, "clip_ratio/region_mean": 0.0015333978735725395, "epoch": 0.031173255400194833, "grad_norm": 0.13142113387584686, "learning_rate": 2e-07, "loss": 0.0429, "step": 334 }, { "clip_ratio/high_max": 0.0020546290834317915, "clip_ratio/high_mean": 0.0008162078083842061, "clip_ratio/low_mean": 0.0006236929057195084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014399007304746192, "epoch": 0.03126658850019542, "grad_norm": 0.13078740239143372, "learning_rate": 2e-07, "loss": 0.0355, "step": 335 }, { "clip_ratio/high_max": 0.0019496910244924948, "clip_ratio/high_mean": 0.0007712158467256813, "clip_ratio/low_mean": 0.0006502433552668663, "clip_ratio/low_min": 1.723840341583127e-05, "clip_ratio/region_mean": 0.001421459201083053, "epoch": 0.031359921600196, "grad_norm": 0.12030063569545746, "learning_rate": 2e-07, "loss": 0.0269, "step": 336 }, { "clip_ratio/high_max": 0.0020667543685704004, "clip_ratio/high_mean": 0.0008630554075352848, "clip_ratio/low_mean": 0.0006681880677206209, "clip_ratio/low_min": 4.004978291050065e-05, "clip_ratio/region_mean": 0.001531243422505213, "epoch": 0.03145325470019658, "grad_norm": 0.12424676865339279, "learning_rate": 2e-07, "loss": 0.0351, "step": 337 }, { "clip_ratio/high_max": 0.0019960505560447928, "clip_ratio/high_mean": 0.0008136813394230558, "clip_ratio/low_mean": 0.0006493733471870655, "clip_ratio/low_min": 2.502357165212743e-05, "clip_ratio/region_mean": 0.001463054730265867, "epoch": 0.031546587800197166, "grad_norm": 0.140573650598526, "learning_rate": 2e-07, "loss": 0.0039, "step": 338 }, { "clip_ratio/high_max": 0.0019912030111299828, "clip_ratio/high_mean": 0.0008389609683945309, "clip_ratio/low_mean": 0.0007777182163408725, "clip_ratio/low_min": 4.299430929677328e-05, "clip_ratio/region_mean": 0.001616679241124075, "epoch": 0.03163992090019775, "grad_norm": 0.12600019574165344, "learning_rate": 2e-07, "loss": 0.0752, "step": 339 }, { "clip_ratio/high_max": 0.00190631156146992, "clip_ratio/high_mean": 0.0006942794880160363, "clip_ratio/low_mean": 0.0006744509610143723, "clip_ratio/low_min": 2.434991802147124e-05, "clip_ratio/region_mean": 0.0013687304490304086, "epoch": 0.03173325400019833, "grad_norm": 0.10787362605333328, "learning_rate": 2e-07, "loss": 0.0604, "step": 340 }, { "clip_ratio/high_max": 0.0018717912207648624, "clip_ratio/high_mean": 0.0007432868496834999, "clip_ratio/low_mean": 0.0006416139949578792, "clip_ratio/low_min": 4.470630483410787e-05, "clip_ratio/region_mean": 0.0013849008792021777, "epoch": 0.031826587100198915, "grad_norm": 0.12344107776880264, "learning_rate": 2e-07, "loss": 0.0076, "step": 341 }, { "clip_ratio/high_max": 0.0018016068970609922, "clip_ratio/high_mean": 0.0008173807727871463, "clip_ratio/low_mean": 0.0006208045751918689, "clip_ratio/low_min": 6.176316765049705e-05, "clip_ratio/region_mean": 0.0014381853397935629, "epoch": 0.0319199202001995, "grad_norm": 0.12946541607379913, "learning_rate": 2e-07, "loss": 0.0364, "step": 342 }, { "clip_ratio/high_max": 0.00202441870351322, "clip_ratio/high_mean": 0.0008188404790416826, "clip_ratio/low_mean": 0.000643654088889889, "clip_ratio/low_min": 6.226074674486881e-05, "clip_ratio/region_mean": 0.0014624945470131934, "epoch": 0.032013253300200085, "grad_norm": 0.11710294336080551, "learning_rate": 2e-07, "loss": 0.0114, "step": 343 }, { "clip_ratio/high_max": 0.002013855533732567, "clip_ratio/high_mean": 0.0007497915321437176, "clip_ratio/low_mean": 0.00063042807050806, "clip_ratio/low_min": 0.00011657851246127393, "clip_ratio/region_mean": 0.0013802195790049154, "epoch": 0.032106586400200664, "grad_norm": 0.13373452425003052, "learning_rate": 2e-07, "loss": 0.0569, "step": 344 }, { "clip_ratio/high_max": 0.002242884671431966, "clip_ratio/high_mean": 0.0009183049169223523, "clip_ratio/low_mean": 0.0005564682596741477, "clip_ratio/low_min": 6.230210510693723e-05, "clip_ratio/region_mean": 0.0014747731875104364, "epoch": 0.03219991950020125, "grad_norm": 0.12011953443288803, "learning_rate": 2e-07, "loss": -0.0222, "step": 345 }, { "clip_ratio/high_max": 0.0020708401934825815, "clip_ratio/high_mean": 0.0009507345657766564, "clip_ratio/low_mean": 0.000621335919277044, "clip_ratio/low_min": 2.5207879843947012e-05, "clip_ratio/region_mean": 0.001572070483234711, "epoch": 0.032293252600201834, "grad_norm": 0.11543308198451996, "learning_rate": 2e-07, "loss": -0.0009, "step": 346 }, { "clip_ratio/high_max": 0.0023103153507690877, "clip_ratio/high_mean": 0.0008825328077364247, "clip_ratio/low_mean": 0.0006463153167715063, "clip_ratio/low_min": 5.598203915724298e-05, "clip_ratio/region_mean": 0.0015288480899471324, "epoch": 0.03238658570020242, "grad_norm": 0.11768263578414917, "learning_rate": 2e-07, "loss": -0.0143, "step": 347 }, { "clip_ratio/high_max": 0.002000804648560006, "clip_ratio/high_mean": 0.0007938522703625495, "clip_ratio/low_mean": 0.0005484688372234814, "clip_ratio/low_min": 4.504949993133778e-05, "clip_ratio/region_mean": 0.0013423211094050203, "epoch": 0.032479918800203, "grad_norm": 0.11947911232709885, "learning_rate": 2e-07, "loss": 0.0346, "step": 348 }, { "clip_ratio/high_max": 0.0020290339962230064, "clip_ratio/high_mean": 0.0007647377096873242, "clip_ratio/low_mean": 0.0006393402381945634, "clip_ratio/low_min": 5.114381019666325e-05, "clip_ratio/region_mean": 0.0014040779569768347, "epoch": 0.03257325190020358, "grad_norm": 0.11520770192146301, "learning_rate": 2e-07, "loss": 0.0267, "step": 349 }, { "clip_ratio/high_max": 0.0017773850668163504, "clip_ratio/high_mean": 0.0007189684774857596, "clip_ratio/low_mean": 0.0005938802150922129, "clip_ratio/low_min": 7.89358964539133e-05, "clip_ratio/region_mean": 0.0013128486643836368, "epoch": 0.03266658500020417, "grad_norm": 0.12005266547203064, "learning_rate": 2e-07, "loss": 0.0186, "step": 350 }, { "clip_ratio/high_max": 0.0018330966267967597, "clip_ratio/high_mean": 0.000767098650612752, "clip_ratio/low_mean": 0.0006052745466149645, "clip_ratio/low_min": 1.3572203897638246e-05, "clip_ratio/region_mean": 0.00137237318631378, "epoch": 0.032759918100204746, "grad_norm": 0.14219240844249725, "learning_rate": 2e-07, "loss": 0.0296, "step": 351 }, { "clip_ratio/high_max": 0.0017666100829956122, "clip_ratio/high_mean": 0.000725057820091024, "clip_ratio/low_mean": 0.0006462834753619973, "clip_ratio/low_min": 4.317881075621699e-05, "clip_ratio/region_mean": 0.0013713412663491908, "epoch": 0.03285325120020533, "grad_norm": 0.10977889597415924, "learning_rate": 2e-07, "loss": 0.0229, "step": 352 }, { "clip_ratio/high_max": 0.002389262012002291, "clip_ratio/high_mean": 0.0009287488846894121, "clip_ratio/low_mean": 0.0005842693053637049, "clip_ratio/low_min": 2.9629880373249762e-05, "clip_ratio/region_mean": 0.0015130181636777706, "epoch": 0.03294658430020592, "grad_norm": 0.11733007431030273, "learning_rate": 2e-07, "loss": 0.0063, "step": 353 }, { "clip_ratio/high_max": 0.001907180507259909, "clip_ratio/high_mean": 0.0008123285824694904, "clip_ratio/low_mean": 0.0005535774234886048, "clip_ratio/low_min": 2.4707741431484465e-05, "clip_ratio/region_mean": 0.0013659059986821376, "epoch": 0.0330399174002065, "grad_norm": 0.11988719552755356, "learning_rate": 2e-07, "loss": -0.0011, "step": 354 }, { "clip_ratio/high_max": 0.002105914223648142, "clip_ratio/high_mean": 0.0008266113309218781, "clip_ratio/low_mean": 0.0006236892741071642, "clip_ratio/low_min": 7.565508985862834e-05, "clip_ratio/region_mean": 0.0014503005950246006, "epoch": 0.03313325050020708, "grad_norm": 0.13023196160793304, "learning_rate": 2e-07, "loss": 0.0471, "step": 355 }, { "clip_ratio/high_max": 0.0018886301113525406, "clip_ratio/high_mean": 0.0007371505671471823, "clip_ratio/low_mean": 0.0006209818593561067, "clip_ratio/low_min": 5.636145715470775e-05, "clip_ratio/region_mean": 0.0013581323983089533, "epoch": 0.033226583600207665, "grad_norm": 0.12149716168642044, "learning_rate": 2e-07, "loss": 0.0286, "step": 356 }, { "clip_ratio/high_max": 0.0019131158514937852, "clip_ratio/high_mean": 0.0007662280495424056, "clip_ratio/low_mean": 0.000597304282564437, "clip_ratio/low_min": 8.841419912641868e-06, "clip_ratio/region_mean": 0.0013635323193739168, "epoch": 0.03331991670020825, "grad_norm": 0.12149664014577866, "learning_rate": 2e-07, "loss": 0.0352, "step": 357 }, { "clip_ratio/high_max": 0.0020080751564819366, "clip_ratio/high_mean": 0.0007916517643025145, "clip_ratio/low_mean": 0.0006037095026840689, "clip_ratio/low_min": 1.8100608031090815e-05, "clip_ratio/region_mean": 0.0013953612506156787, "epoch": 0.033413249800208836, "grad_norm": 0.11034026741981506, "learning_rate": 2e-07, "loss": 0.0381, "step": 358 }, { "clip_ratio/high_max": 0.002269225449708756, "clip_ratio/high_mean": 0.0009011512047436554, "clip_ratio/low_mean": 0.0005662503208441194, "clip_ratio/low_min": 4.208002246741671e-05, "clip_ratio/region_mean": 0.0014674015510536265, "epoch": 0.033506582900209414, "grad_norm": 0.12639440596103668, "learning_rate": 2e-07, "loss": -0.0078, "step": 359 }, { "clip_ratio/high_max": 0.002129264823452104, "clip_ratio/high_mean": 0.0008123151183099253, "clip_ratio/low_mean": 0.000670363697281573, "clip_ratio/low_min": 1.3682136341230944e-05, "clip_ratio/region_mean": 0.0014826787883066572, "epoch": 0.03359991600021, "grad_norm": 0.12230025231838226, "learning_rate": 2e-07, "loss": 0.0259, "step": 360 }, { "clip_ratio/high_max": 0.001760990751790814, "clip_ratio/high_mean": 0.0008231172523665009, "clip_ratio/low_mean": 0.0006312998539215187, "clip_ratio/low_min": 6.47276833660726e-05, "clip_ratio/region_mean": 0.001454417080822168, "epoch": 0.033693249100210584, "grad_norm": 0.12162677198648453, "learning_rate": 2e-07, "loss": 0.0564, "step": 361 }, { "clip_ratio/high_max": 0.002056452227407135, "clip_ratio/high_mean": 0.0008314186634379439, "clip_ratio/low_mean": 0.0006108053003117675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001442223943740828, "epoch": 0.03378658220021117, "grad_norm": 0.12398241460323334, "learning_rate": 2e-07, "loss": 0.021, "step": 362 }, { "clip_ratio/high_max": 0.0020690746678155847, "clip_ratio/high_mean": 0.0008090111314231763, "clip_ratio/low_mean": 0.0007044975100143347, "clip_ratio/low_min": 0.00014991661100793863, "clip_ratio/region_mean": 0.0015135086287045851, "epoch": 0.03387991530021175, "grad_norm": 0.14264896512031555, "learning_rate": 2e-07, "loss": 0.0604, "step": 363 }, { "clip_ratio/high_max": 0.002066791814286262, "clip_ratio/high_mean": 0.0008508820974384435, "clip_ratio/low_mean": 0.0006410474834410707, "clip_ratio/low_min": 4.581278335535899e-05, "clip_ratio/region_mean": 0.0014919295645086095, "epoch": 0.03397324840021233, "grad_norm": 0.1389528065919876, "learning_rate": 2e-07, "loss": 0.0173, "step": 364 }, { "clip_ratio/high_max": 0.0020607311598723754, "clip_ratio/high_mean": 0.0008336198898177827, "clip_ratio/low_mean": 0.0006827998149674386, "clip_ratio/low_min": 3.025066371264984e-05, "clip_ratio/region_mean": 0.001516419662948465, "epoch": 0.03406658150021292, "grad_norm": 0.13245995342731476, "learning_rate": 2e-07, "loss": 0.0293, "step": 365 }, { "clip_ratio/high_max": 0.0016272594257316086, "clip_ratio/high_mean": 0.0006732027550242492, "clip_ratio/low_mean": 0.0007372142172243912, "clip_ratio/low_min": 0.00017340998419967946, "clip_ratio/region_mean": 0.001410416945873294, "epoch": 0.034159914600213497, "grad_norm": 0.1343669593334198, "learning_rate": 2e-07, "loss": 0.0843, "step": 366 }, { "clip_ratio/high_max": 0.0018048632009595167, "clip_ratio/high_mean": 0.0007258457371790428, "clip_ratio/low_mean": 0.0006483133802248631, "clip_ratio/low_min": 2.721192686294671e-05, "clip_ratio/region_mean": 0.0013741591101279482, "epoch": 0.03425324770021408, "grad_norm": 0.10857783257961273, "learning_rate": 2e-07, "loss": 0.0613, "step": 367 }, { "clip_ratio/high_max": 0.001979690645384835, "clip_ratio/high_mean": 0.0007917734492366435, "clip_ratio/low_mean": 0.0005602649962384021, "clip_ratio/low_min": 3.9397029468091205e-05, "clip_ratio/region_mean": 0.0013520384127332363, "epoch": 0.03434658080021467, "grad_norm": 0.12216323614120483, "learning_rate": 2e-07, "loss": 0.0275, "step": 368 }, { "clip_ratio/high_max": 0.0019470178194751497, "clip_ratio/high_mean": 0.0007820853443263331, "clip_ratio/low_mean": 0.0006729179413014208, "clip_ratio/low_min": 8.100918239506427e-05, "clip_ratio/region_mean": 0.001455003279261291, "epoch": 0.03443991390021525, "grad_norm": 0.1256723552942276, "learning_rate": 2e-07, "loss": 0.0141, "step": 369 }, { "clip_ratio/high_max": 0.0020400792091095354, "clip_ratio/high_mean": 0.000811651443655137, "clip_ratio/low_mean": 0.0007086436889949255, "clip_ratio/low_min": 6.742117057001451e-05, "clip_ratio/region_mean": 0.0015202951472019777, "epoch": 0.03453324700021583, "grad_norm": 0.12908712029457092, "learning_rate": 2e-07, "loss": 0.0543, "step": 370 }, { "clip_ratio/high_max": 0.0021758182047051378, "clip_ratio/high_mean": 0.0008371064213861246, "clip_ratio/low_mean": 0.0006683509545837296, "clip_ratio/low_min": 3.573279445845401e-05, "clip_ratio/region_mean": 0.001505457370512886, "epoch": 0.034626580100216416, "grad_norm": 0.12797218561172485, "learning_rate": 2e-07, "loss": 0.0561, "step": 371 }, { "clip_ratio/high_max": 0.0016280414820357691, "clip_ratio/high_mean": 0.0006990388064878061, "clip_ratio/low_mean": 0.0007220758470793953, "clip_ratio/low_min": 5.1524481023079716e-05, "clip_ratio/region_mean": 0.0014211146954039577, "epoch": 0.034719913200217, "grad_norm": 0.11436526477336884, "learning_rate": 2e-07, "loss": 0.0401, "step": 372 }, { "clip_ratio/high_max": 0.0019686389787239023, "clip_ratio/high_mean": 0.0008020481036510319, "clip_ratio/low_mean": 0.0006287805044848938, "clip_ratio/low_min": 5.300663633533986e-05, "clip_ratio/region_mean": 0.0014308286226878408, "epoch": 0.034813246300217586, "grad_norm": 0.12753590941429138, "learning_rate": 2e-07, "loss": 0.0134, "step": 373 }, { "clip_ratio/high_max": 0.00219043673860142, "clip_ratio/high_mean": 0.0009402333143953001, "clip_ratio/low_mean": 0.0006243069046831806, "clip_ratio/low_min": 5.383817006077152e-05, "clip_ratio/region_mean": 0.001564540227263933, "epoch": 0.034906579400218164, "grad_norm": 0.1635570526123047, "learning_rate": 2e-07, "loss": 0.0325, "step": 374 }, { "clip_ratio/high_max": 0.0019958456105086952, "clip_ratio/high_mean": 0.0008614963007858023, "clip_ratio/low_mean": 0.0006324863716145046, "clip_ratio/low_min": 1.5405472368001938e-05, "clip_ratio/region_mean": 0.0014939826942281798, "epoch": 0.03499991250021875, "grad_norm": 0.11014176160097122, "learning_rate": 2e-07, "loss": 0.0061, "step": 375 }, { "clip_ratio/high_max": 0.001989554257306736, "clip_ratio/high_mean": 0.0008578917222621385, "clip_ratio/low_mean": 0.000605685790105781, "clip_ratio/low_min": 4.614793988366728e-05, "clip_ratio/region_mean": 0.0014635774787166156, "epoch": 0.035093245600219335, "grad_norm": 0.1236497312784195, "learning_rate": 2e-07, "loss": 0.0505, "step": 376 }, { "clip_ratio/high_max": 0.001994188980461331, "clip_ratio/high_mean": 0.0008141411526594311, "clip_ratio/low_mean": 0.000618780763943505, "clip_ratio/low_min": 1.2005378266621847e-05, "clip_ratio/region_mean": 0.0014329219120554626, "epoch": 0.03518657870021991, "grad_norm": 0.11784907430410385, "learning_rate": 2e-07, "loss": 0.0392, "step": 377 }, { "clip_ratio/high_max": 0.0019694275615620427, "clip_ratio/high_mean": 0.00090624990662036, "clip_ratio/low_mean": 0.000524325178048457, "clip_ratio/low_min": 4.2793630200321786e-05, "clip_ratio/region_mean": 0.0014305751174106263, "epoch": 0.0352799118002205, "grad_norm": 0.11589766293764114, "learning_rate": 2e-07, "loss": -0.0072, "step": 378 }, { "clip_ratio/high_max": 0.0020863029421889223, "clip_ratio/high_mean": 0.0008276191092591034, "clip_ratio/low_mean": 0.0006072749047234538, "clip_ratio/low_min": 2.4620095700811362e-05, "clip_ratio/region_mean": 0.0014348940203490201, "epoch": 0.03537324490022108, "grad_norm": 0.12050627171993256, "learning_rate": 2e-07, "loss": 0.0111, "step": 379 }, { "clip_ratio/high_max": 0.0018348816483921837, "clip_ratio/high_mean": 0.0008027445455809357, "clip_ratio/low_mean": 0.0006470960997830844, "clip_ratio/low_min": 9.941148164216429e-06, "clip_ratio/region_mean": 0.0014498406380880624, "epoch": 0.03546657800022167, "grad_norm": 0.11710581183433533, "learning_rate": 2e-07, "loss": 0.0451, "step": 380 }, { "clip_ratio/high_max": 0.0018553924019215629, "clip_ratio/high_mean": 0.0007251129045471316, "clip_ratio/low_mean": 0.0006450925693570753, "clip_ratio/low_min": 2.951593887701165e-05, "clip_ratio/region_mean": 0.0013702054748137016, "epoch": 0.03555991110022225, "grad_norm": 0.1148415207862854, "learning_rate": 2e-07, "loss": 0.0364, "step": 381 }, { "clip_ratio/high_max": 0.002188382735766936, "clip_ratio/high_mean": 0.0007418805289489683, "clip_ratio/low_mean": 0.0006719032444379991, "clip_ratio/low_min": 8.273737421404803e-05, "clip_ratio/region_mean": 0.001413783771567978, "epoch": 0.03565324420022283, "grad_norm": 0.11965974420309067, "learning_rate": 2e-07, "loss": 0.0286, "step": 382 }, { "clip_ratio/high_max": 0.0022877613118907902, "clip_ratio/high_mean": 0.0008502288783347467, "clip_ratio/low_mean": 0.0006915737740200711, "clip_ratio/low_min": 6.654474600509275e-05, "clip_ratio/region_mean": 0.0015418026559927966, "epoch": 0.03574657730022342, "grad_norm": 0.12980860471725464, "learning_rate": 2e-07, "loss": 0.0482, "step": 383 }, { "clip_ratio/high_max": 0.001956745942152338, "clip_ratio/high_mean": 0.0007399788755719783, "clip_ratio/low_mean": 0.0006785868736187695, "clip_ratio/low_min": 1.5672016161261126e-05, "clip_ratio/region_mean": 0.0014185657564667054, "epoch": 0.035839910400224, "grad_norm": 0.11257439106702805, "learning_rate": 2e-07, "loss": 0.0451, "step": 384 }, { "clip_ratio/high_max": 0.001635877739317948, "clip_ratio/high_mean": 0.0007231750005303184, "clip_ratio/low_mean": 0.000614899397987756, "clip_ratio/low_min": 9.094878168980358e-05, "clip_ratio/region_mean": 0.0013380743876041379, "completions/clipped_ratio": 0.0156947544642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 632.9285888671875, "completions/mean_terminated_length": 577.7098388671875, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.03593324350022458, "grad_norm": 0.11997140198945999, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 329261056.0, "reward": 0.5730765461921692, "reward_std": 0.1984192281961441, "rewards/simpleverify_reward/mean": 0.5730765461921692, "rewards/simpleverify_reward/std": 0.49463316798210144, "step": 385 }, { "clip_ratio/high_max": 0.0018876111207646318, "clip_ratio/high_mean": 0.000837262807181105, "clip_ratio/low_mean": 0.0005572338650381425, "clip_ratio/low_min": 2.6356775379099417e-05, "clip_ratio/region_mean": 0.0013944966776762158, "epoch": 0.036026576600225166, "grad_norm": 0.12326093018054962, "learning_rate": 2e-07, "loss": 0.0043, "step": 386 }, { "clip_ratio/high_max": 0.0017761959315976128, "clip_ratio/high_mean": 0.00077988690645725, "clip_ratio/low_mean": 0.0005185645404708339, "clip_ratio/low_min": 7.549017573182937e-05, "clip_ratio/region_mean": 0.0012984514578420203, "epoch": 0.03611990970022575, "grad_norm": 0.11433762311935425, "learning_rate": 2e-07, "loss": 0.0206, "step": 387 }, { "clip_ratio/high_max": 0.0018871435531764291, "clip_ratio/high_mean": 0.0007079850911395624, "clip_ratio/low_mean": 0.0006058954168111086, "clip_ratio/low_min": 2.947328903246671e-05, "clip_ratio/region_mean": 0.001313880507950671, "epoch": 0.03621324280022633, "grad_norm": 0.12256833165884018, "learning_rate": 2e-07, "loss": 0.0531, "step": 388 }, { "clip_ratio/high_max": 0.0015593749121762812, "clip_ratio/high_mean": 0.0006488196850114036, "clip_ratio/low_mean": 0.0006126592525106389, "clip_ratio/low_min": 4.342332704254659e-05, "clip_ratio/region_mean": 0.0012614789739018306, "epoch": 0.036306575900226914, "grad_norm": 0.11836711317300797, "learning_rate": 2e-07, "loss": 0.0541, "step": 389 }, { "clip_ratio/high_max": 0.0019476094930723775, "clip_ratio/high_mean": 0.0007355644811468665, "clip_ratio/low_mean": 0.0005683511153620202, "clip_ratio/low_min": 1.9876078113156836e-05, "clip_ratio/region_mean": 0.001303915589232929, "epoch": 0.0363999090002275, "grad_norm": 0.11679317057132721, "learning_rate": 2e-07, "loss": 0.0444, "step": 390 }, { "clip_ratio/high_max": 0.0021670229361916427, "clip_ratio/high_mean": 0.0008395259283133782, "clip_ratio/low_mean": 0.0005811416976939654, "clip_ratio/low_min": 1.2460127436497714e-05, "clip_ratio/region_mean": 0.0014206675768946297, "epoch": 0.036493242100228085, "grad_norm": 0.12928101420402527, "learning_rate": 2e-07, "loss": 0.0187, "step": 391 }, { "clip_ratio/high_max": 0.0018397084531898145, "clip_ratio/high_mean": 0.0007999890422070166, "clip_ratio/low_mean": 0.0005516105229617096, "clip_ratio/low_min": 1.4873869986331556e-05, "clip_ratio/region_mean": 0.0013515995560737792, "epoch": 0.03658657520022866, "grad_norm": 0.12298496067523956, "learning_rate": 2e-07, "loss": 0.0206, "step": 392 }, { "clip_ratio/high_max": 0.0020783258150913753, "clip_ratio/high_mean": 0.0008299843302665977, "clip_ratio/low_mean": 0.0004634065562640899, "clip_ratio/low_min": 7.659314178454224e-06, "clip_ratio/region_mean": 0.0012933908692502882, "epoch": 0.03667990830022925, "grad_norm": 0.1105446144938469, "learning_rate": 2e-07, "loss": 0.032, "step": 393 }, { "clip_ratio/high_max": 0.00172527633185382, "clip_ratio/high_mean": 0.0006747788793290965, "clip_ratio/low_mean": 0.0005321496355463751, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012069285476172809, "epoch": 0.036773241400229834, "grad_norm": 0.11346527189016342, "learning_rate": 2e-07, "loss": 0.0242, "step": 394 }, { "clip_ratio/high_max": 0.0018345289754506666, "clip_ratio/high_mean": 0.0007844047468097415, "clip_ratio/low_mean": 0.00048737414454080863, "clip_ratio/low_min": 2.3439461983798537e-05, "clip_ratio/region_mean": 0.0012717788813461084, "epoch": 0.03686657450023042, "grad_norm": 0.11031324416399002, "learning_rate": 2e-07, "loss": 0.0234, "step": 395 }, { "clip_ratio/high_max": 0.0018091752281179652, "clip_ratio/high_mean": 0.0007829425576346694, "clip_ratio/low_mean": 0.0005732427343900781, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013561852865677793, "epoch": 0.036959907600231, "grad_norm": 0.119163878262043, "learning_rate": 2e-07, "loss": 0.0337, "step": 396 }, { "clip_ratio/high_max": 0.001953605533344671, "clip_ratio/high_mean": 0.0008116526842059102, "clip_ratio/low_mean": 0.0005036395250499481, "clip_ratio/low_min": 2.3932848307595123e-05, "clip_ratio/region_mean": 0.0013152922001609113, "epoch": 0.03705324070023158, "grad_norm": 0.12695623934268951, "learning_rate": 2e-07, "loss": -0.0113, "step": 397 }, { "clip_ratio/high_max": 0.0017585911191417836, "clip_ratio/high_mean": 0.0007527170328103239, "clip_ratio/low_mean": 0.0005569904315052554, "clip_ratio/low_min": 7.520036706409883e-05, "clip_ratio/region_mean": 0.001309707480686484, "epoch": 0.03714657380023217, "grad_norm": 0.11844083666801453, "learning_rate": 2e-07, "loss": 0.0252, "step": 398 }, { "clip_ratio/high_max": 0.0017968105894397013, "clip_ratio/high_mean": 0.0006859022851131158, "clip_ratio/low_mean": 0.0005486042391567025, "clip_ratio/low_min": 2.9215374524937943e-05, "clip_ratio/region_mean": 0.0012345065370027442, "epoch": 0.03723990690023275, "grad_norm": 0.10583288222551346, "learning_rate": 2e-07, "loss": 0.0466, "step": 399 }, { "clip_ratio/high_max": 0.0019447361191851087, "clip_ratio/high_mean": 0.000772352710555424, "clip_ratio/low_mean": 0.0005394804793468211, "clip_ratio/low_min": 8.99151200428605e-06, "clip_ratio/region_mean": 0.0013118331917212345, "epoch": 0.03733324000023333, "grad_norm": 0.13289691507816315, "learning_rate": 2e-07, "loss": 0.0227, "step": 400 }, { "clip_ratio/high_max": 0.001714615078526549, "clip_ratio/high_mean": 0.0007293125145224622, "clip_ratio/low_mean": 0.0006347885318973567, "clip_ratio/low_min": 3.9188170376291964e-05, "clip_ratio/region_mean": 0.0013641010191349778, "epoch": 0.037426573100233916, "grad_norm": 0.13370080292224884, "learning_rate": 2e-07, "loss": 0.0714, "step": 401 }, { "clip_ratio/high_max": 0.0021376897420850582, "clip_ratio/high_mean": 0.0007666979872738011, "clip_ratio/low_mean": 0.0006158499963930808, "clip_ratio/low_min": 6.278149157878943e-05, "clip_ratio/region_mean": 0.001382547983666882, "epoch": 0.0375199062002345, "grad_norm": 0.11720848828554153, "learning_rate": 2e-07, "loss": 0.0251, "step": 402 }, { "clip_ratio/high_max": 0.002159803800168447, "clip_ratio/high_mean": 0.0008087530459306436, "clip_ratio/low_mean": 0.0005690839152521221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013778369793726597, "epoch": 0.03761323930023508, "grad_norm": 0.12078811228275299, "learning_rate": 2e-07, "loss": -0.0064, "step": 403 }, { "clip_ratio/high_max": 0.0018039014466921799, "clip_ratio/high_mean": 0.0006723776714352425, "clip_ratio/low_mean": 0.0005232569683357724, "clip_ratio/low_min": 3.764524899452226e-05, "clip_ratio/region_mean": 0.0011956346570514143, "epoch": 0.037706572400235665, "grad_norm": 0.10372593253850937, "learning_rate": 2e-07, "loss": 0.0313, "step": 404 }, { "clip_ratio/high_max": 0.0018867071048589423, "clip_ratio/high_mean": 0.0007387079403997632, "clip_ratio/low_mean": 0.0005530236103368225, "clip_ratio/low_min": 5.546703960135346e-05, "clip_ratio/region_mean": 0.001291731583478395, "epoch": 0.03779990550023625, "grad_norm": 0.11051561683416367, "learning_rate": 2e-07, "loss": -0.0101, "step": 405 }, { "clip_ratio/high_max": 0.0021502488598343916, "clip_ratio/high_mean": 0.0008747991450945847, "clip_ratio/low_mean": 0.0005174204534341698, "clip_ratio/low_min": 2.7098690225102473e-05, "clip_ratio/region_mean": 0.0013922196158091538, "epoch": 0.037893238600236835, "grad_norm": 0.11935614794492722, "learning_rate": 2e-07, "loss": -0.007, "step": 406 }, { "clip_ratio/high_max": 0.001992880235775374, "clip_ratio/high_mean": 0.0007544078580394853, "clip_ratio/low_mean": 0.0006092137364248629, "clip_ratio/low_min": 5.675428292306606e-05, "clip_ratio/region_mean": 0.0013636215808219276, "epoch": 0.03798657170023741, "grad_norm": 0.10517723113298416, "learning_rate": 2e-07, "loss": 0.0558, "step": 407 }, { "clip_ratio/high_max": 0.0019211721009924076, "clip_ratio/high_mean": 0.0008471281707898015, "clip_ratio/low_mean": 0.0005720361732528545, "clip_ratio/low_min": 5.742740813730052e-05, "clip_ratio/region_mean": 0.0014191643422236666, "epoch": 0.038079904800238, "grad_norm": 0.1262596845626831, "learning_rate": 2e-07, "loss": 0.0383, "step": 408 }, { "clip_ratio/high_max": 0.0016142095810209867, "clip_ratio/high_mean": 0.0006396558183041634, "clip_ratio/low_mean": 0.0006194958350533852, "clip_ratio/low_min": 8.077622715063626e-05, "clip_ratio/region_mean": 0.0012591516679094639, "epoch": 0.038173237900238584, "grad_norm": 0.11597229540348053, "learning_rate": 2e-07, "loss": 0.0736, "step": 409 }, { "clip_ratio/high_max": 0.00210810285716434, "clip_ratio/high_mean": 0.0008145096126099816, "clip_ratio/low_mean": 0.0005393694573285757, "clip_ratio/low_min": 1.512834114691941e-05, "clip_ratio/region_mean": 0.001353879109956324, "epoch": 0.03826657100023917, "grad_norm": 0.1308620125055313, "learning_rate": 2e-07, "loss": -0.0094, "step": 410 }, { "clip_ratio/high_max": 0.0018993276898982003, "clip_ratio/high_mean": 0.0007938734706840478, "clip_ratio/low_mean": 0.0006028143270668807, "clip_ratio/low_min": 7.225111221487168e-05, "clip_ratio/region_mean": 0.0013966878068458755, "epoch": 0.03835990410023975, "grad_norm": 0.1226753443479538, "learning_rate": 2e-07, "loss": -0.0169, "step": 411 }, { "clip_ratio/high_max": 0.0016973646415863186, "clip_ratio/high_mean": 0.0007062099475660943, "clip_ratio/low_mean": 0.0005961841598036699, "clip_ratio/low_min": 2.399502136540832e-05, "clip_ratio/region_mean": 0.0013023941064602695, "epoch": 0.03845323720024033, "grad_norm": 0.1270218789577484, "learning_rate": 2e-07, "loss": -0.0047, "step": 412 }, { "clip_ratio/high_max": 0.0015025190659798682, "clip_ratio/high_mean": 0.0006012071989971446, "clip_ratio/low_mean": 0.0005003771575502469, "clip_ratio/low_min": 7.64181895647198e-05, "clip_ratio/region_mean": 0.0011015843447239604, "epoch": 0.03854657030024092, "grad_norm": 0.10597111284732819, "learning_rate": 2e-07, "loss": 0.0363, "step": 413 }, { "clip_ratio/high_max": 0.001914199863676913, "clip_ratio/high_mean": 0.000794652392869466, "clip_ratio/low_mean": 0.0006189051773617393, "clip_ratio/low_min": 2.7015345040126704e-05, "clip_ratio/region_mean": 0.0014135575402178802, "epoch": 0.038639903400241496, "grad_norm": 0.10367751866579056, "learning_rate": 2e-07, "loss": -0.0202, "step": 414 }, { "clip_ratio/high_max": 0.001779600846930407, "clip_ratio/high_mean": 0.0007130969661375275, "clip_ratio/low_mean": 0.0005086125875095604, "clip_ratio/low_min": 5.81720041736844e-06, "clip_ratio/region_mean": 0.0012217095281812362, "epoch": 0.03873323650024208, "grad_norm": 0.11533833295106888, "learning_rate": 2e-07, "loss": 0.0465, "step": 415 }, { "clip_ratio/high_max": 0.0016242836391029414, "clip_ratio/high_mean": 0.0007737649066257291, "clip_ratio/low_mean": 0.0005529297131943167, "clip_ratio/low_min": 6.7501891862775665e-06, "clip_ratio/region_mean": 0.0013266946043586358, "epoch": 0.038826569600242666, "grad_norm": 0.10784076899290085, "learning_rate": 2e-07, "loss": -0.0102, "step": 416 }, { "clip_ratio/high_max": 0.0018035570028587244, "clip_ratio/high_mean": 0.0006998252010816941, "clip_ratio/low_mean": 0.0006647147238254547, "clip_ratio/low_min": 5.3284436489775544e-05, "clip_ratio/region_mean": 0.0013645399267261382, "epoch": 0.03891990270024325, "grad_norm": 0.11329121887683868, "learning_rate": 2e-07, "loss": 0.0544, "step": 417 }, { "clip_ratio/high_max": 0.0017844442882051226, "clip_ratio/high_mean": 0.0007359397441177862, "clip_ratio/low_mean": 0.0004905389296254725, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012264786892046686, "epoch": 0.03901323580024383, "grad_norm": 0.11630301177501678, "learning_rate": 2e-07, "loss": -0.0011, "step": 418 }, { "clip_ratio/high_max": 0.0016670763689035084, "clip_ratio/high_mean": 0.0006667568459306494, "clip_ratio/low_mean": 0.0005660979950334877, "clip_ratio/low_min": 1.1857332538056653e-05, "clip_ratio/region_mean": 0.0012328548473306, "epoch": 0.039106568900244415, "grad_norm": 0.11150956153869629, "learning_rate": 2e-07, "loss": 0.0417, "step": 419 }, { "clip_ratio/high_max": 0.001763537966326112, "clip_ratio/high_mean": 0.0006895848982821917, "clip_ratio/low_mean": 0.0005921732745264308, "clip_ratio/low_min": 6.7146538640372455e-06, "clip_ratio/region_mean": 0.0012817582064599264, "epoch": 0.039199902000245, "grad_norm": 0.12021953612565994, "learning_rate": 2e-07, "loss": 0.0052, "step": 420 }, { "clip_ratio/high_max": 0.00216739912139019, "clip_ratio/high_mean": 0.0008490982108924072, "clip_ratio/low_mean": 0.0006336785463645356, "clip_ratio/low_min": 7.203700442914851e-05, "clip_ratio/region_mean": 0.0014827767517999746, "epoch": 0.039293235100245585, "grad_norm": 0.128702774643898, "learning_rate": 2e-07, "loss": 0.0316, "step": 421 }, { "clip_ratio/high_max": 0.001965448034752626, "clip_ratio/high_mean": 0.0007909856703918194, "clip_ratio/low_mean": 0.0006021543667884544, "clip_ratio/low_min": 4.835252730117645e-05, "clip_ratio/region_mean": 0.0013931400353612844, "epoch": 0.039386568200246164, "grad_norm": 0.12577465176582336, "learning_rate": 2e-07, "loss": 0.0147, "step": 422 }, { "clip_ratio/high_max": 0.00230452606047038, "clip_ratio/high_mean": 0.000896001100045396, "clip_ratio/low_mean": 0.0006176184015203035, "clip_ratio/low_min": 5.063801654614508e-05, "clip_ratio/region_mean": 0.0015136195033846889, "epoch": 0.03947990130024675, "grad_norm": 0.12363224476575851, "learning_rate": 2e-07, "loss": -0.006, "step": 423 }, { "clip_ratio/high_max": 0.0017180107897729613, "clip_ratio/high_mean": 0.0006837930686742766, "clip_ratio/low_mean": 0.0005640370090986835, "clip_ratio/low_min": 4.2053748984471895e-05, "clip_ratio/region_mean": 0.0012478301068767905, "epoch": 0.039573234400247334, "grad_norm": 0.12319812923669815, "learning_rate": 2e-07, "loss": 0.0538, "step": 424 }, { "clip_ratio/high_max": 0.001999053834879305, "clip_ratio/high_mean": 0.000756128078137408, "clip_ratio/low_mean": 0.0005654994674841873, "clip_ratio/low_min": 7.502082371502183e-05, "clip_ratio/region_mean": 0.0013216275401646271, "epoch": 0.03966656750024792, "grad_norm": 0.11434979736804962, "learning_rate": 2e-07, "loss": 0.08, "step": 425 }, { "clip_ratio/high_max": 0.0016790965091786347, "clip_ratio/high_mean": 0.0007002053353062365, "clip_ratio/low_mean": 0.0006208872709976276, "clip_ratio/low_min": 4.2947947804350406e-05, "clip_ratio/region_mean": 0.0013210925899329595, "epoch": 0.0397599006002485, "grad_norm": 0.12547720968723297, "learning_rate": 2e-07, "loss": 0.0344, "step": 426 }, { "clip_ratio/high_max": 0.0018979049782501534, "clip_ratio/high_mean": 0.0007053877670841757, "clip_ratio/low_mean": 0.0006482241024059476, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013536118967749644, "epoch": 0.03985323370024908, "grad_norm": 0.11440374702215195, "learning_rate": 2e-07, "loss": 0.0433, "step": 427 }, { "clip_ratio/high_max": 0.0014545097728841938, "clip_ratio/high_mean": 0.0006040916487108916, "clip_ratio/low_mean": 0.0006497247122752015, "clip_ratio/low_min": 1.0072521945403423e-05, "clip_ratio/region_mean": 0.001253816360986093, "epoch": 0.03994656680024967, "grad_norm": 0.1268068253993988, "learning_rate": 2e-07, "loss": 0.076, "step": 428 }, { "clip_ratio/high_max": 0.001509308163804235, "clip_ratio/high_mean": 0.0005857292799191782, "clip_ratio/low_mean": 0.0006329973984975368, "clip_ratio/low_min": 1.8884697055909783e-05, "clip_ratio/region_mean": 0.0012187266547698528, "epoch": 0.040039899900250246, "grad_norm": 0.11423606425523758, "learning_rate": 2e-07, "loss": 0.0533, "step": 429 }, { "clip_ratio/high_max": 0.0019502655923133716, "clip_ratio/high_mean": 0.0008069239993346855, "clip_ratio/low_mean": 0.0005672048828273546, "clip_ratio/low_min": 1.593168417457491e-05, "clip_ratio/region_mean": 0.0013741288748860825, "epoch": 0.04013323300025083, "grad_norm": 0.12582477927207947, "learning_rate": 2e-07, "loss": 0.006, "step": 430 }, { "clip_ratio/high_max": 0.0017096993869927246, "clip_ratio/high_mean": 0.0007167305338953156, "clip_ratio/low_mean": 0.0006739218351867748, "clip_ratio/low_min": 0.00015516630355705274, "clip_ratio/region_mean": 0.0013906523527111858, "epoch": 0.04022656610025142, "grad_norm": 0.12487882375717163, "learning_rate": 2e-07, "loss": 0.0914, "step": 431 }, { "clip_ratio/high_max": 0.0019112973131996114, "clip_ratio/high_mean": 0.0007482011678803246, "clip_ratio/low_mean": 0.0005619742469207267, "clip_ratio/low_min": 2.5127331355179194e-05, "clip_ratio/region_mean": 0.0013101754084345885, "epoch": 0.040319899200252, "grad_norm": 0.1234482005238533, "learning_rate": 2e-07, "loss": 0.0304, "step": 432 }, { "clip_ratio/high_max": 0.00199937034631148, "clip_ratio/high_mean": 0.0008280788570118602, "clip_ratio/low_mean": 0.0005384793967095902, "clip_ratio/low_min": 1.2583048373926431e-05, "clip_ratio/region_mean": 0.0013665582591784187, "epoch": 0.04041323230025258, "grad_norm": 0.11249708384275436, "learning_rate": 2e-07, "loss": 0.0107, "step": 433 }, { "clip_ratio/high_max": 0.0019216128148400458, "clip_ratio/high_mean": 0.000743208044241328, "clip_ratio/low_mean": 0.0005600183617389121, "clip_ratio/low_min": 4.955810436513275e-05, "clip_ratio/region_mean": 0.0013032264250796288, "epoch": 0.040506565400253165, "grad_norm": 0.1126217320561409, "learning_rate": 2e-07, "loss": 0.0311, "step": 434 }, { "clip_ratio/high_max": 0.0017897943944262806, "clip_ratio/high_mean": 0.0007111694994819118, "clip_ratio/low_mean": 0.0005982841976219788, "clip_ratio/low_min": 8.257147419499233e-05, "clip_ratio/region_mean": 0.0013094537243887316, "epoch": 0.04059989850025375, "grad_norm": 0.1288425475358963, "learning_rate": 2e-07, "loss": 0.0627, "step": 435 }, { "clip_ratio/high_max": 0.00198351360813831, "clip_ratio/high_mean": 0.0008419586083618924, "clip_ratio/low_mean": 0.000520566155501001, "clip_ratio/low_min": 1.3493091501004528e-05, "clip_ratio/region_mean": 0.0013625247520394623, "epoch": 0.040693231600254336, "grad_norm": 0.12404875457286835, "learning_rate": 2e-07, "loss": -0.0074, "step": 436 }, { "clip_ratio/high_max": 0.002239898378320504, "clip_ratio/high_mean": 0.0009122826268139761, "clip_ratio/low_mean": 0.00048747577056929003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013997583810123615, "epoch": 0.040786564700254914, "grad_norm": 0.11860135942697525, "learning_rate": 2e-07, "loss": -0.0181, "step": 437 }, { "clip_ratio/high_max": 0.0019366519190953113, "clip_ratio/high_mean": 0.0007867393214837648, "clip_ratio/low_mean": 0.0006420954059649375, "clip_ratio/low_min": 7.760135940770851e-05, "clip_ratio/region_mean": 0.001428834737453144, "epoch": 0.0408798978002555, "grad_norm": 0.13247065246105194, "learning_rate": 2e-07, "loss": 0.0136, "step": 438 }, { "clip_ratio/high_max": 0.0017299235769314691, "clip_ratio/high_mean": 0.0007306657516892301, "clip_ratio/low_mean": 0.000673707900205045, "clip_ratio/low_min": 8.215314664994366e-05, "clip_ratio/region_mean": 0.0014043736664461903, "epoch": 0.040973230900256084, "grad_norm": 0.124836266040802, "learning_rate": 2e-07, "loss": 0.0509, "step": 439 }, { "clip_ratio/high_max": 0.0019744774835999124, "clip_ratio/high_mean": 0.0007914960115158465, "clip_ratio/low_mean": 0.0005792353731521871, "clip_ratio/low_min": 7.542397679571877e-05, "clip_ratio/region_mean": 0.0013707314028579276, "epoch": 0.04106656400025666, "grad_norm": 0.11174217611551285, "learning_rate": 2e-07, "loss": 0.0532, "step": 440 }, { "clip_ratio/high_max": 0.0020874797010037582, "clip_ratio/high_mean": 0.0008413058922087657, "clip_ratio/low_mean": 0.0006269640816753963, "clip_ratio/low_min": 3.314004925414338e-05, "clip_ratio/region_mean": 0.0014682699766126461, "epoch": 0.04115989710025725, "grad_norm": 0.12802769243717194, "learning_rate": 2e-07, "loss": 0.0781, "step": 441 }, { "clip_ratio/high_max": 0.0018837543611880392, "clip_ratio/high_mean": 0.0007623668952874141, "clip_ratio/low_mean": 0.0005598945790552534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013222614979895297, "epoch": 0.04125323020025783, "grad_norm": 0.12944675981998444, "learning_rate": 2e-07, "loss": 0.0376, "step": 442 }, { "clip_ratio/high_max": 0.0019327346344653051, "clip_ratio/high_mean": 0.0008591074183641467, "clip_ratio/low_mean": 0.0006016092311256216, "clip_ratio/low_min": 5.037430673837662e-05, "clip_ratio/region_mean": 0.00146071665949421, "epoch": 0.04134656330025842, "grad_norm": 0.12969927489757538, "learning_rate": 2e-07, "loss": -0.0024, "step": 443 }, { "clip_ratio/high_max": 0.001821817520976765, "clip_ratio/high_mean": 0.0007717122698522871, "clip_ratio/low_mean": 0.000641424101559096, "clip_ratio/low_min": 4.92533117721905e-05, "clip_ratio/region_mean": 0.0014131363968772348, "epoch": 0.041439896400258996, "grad_norm": 0.12749607861042023, "learning_rate": 2e-07, "loss": 0.0339, "step": 444 }, { "clip_ratio/high_max": 0.001941720984177664, "clip_ratio/high_mean": 0.0008374235530936858, "clip_ratio/low_mean": 0.0005205985971770133, "clip_ratio/low_min": 2.4299011784023605e-05, "clip_ratio/region_mean": 0.0013580221602751408, "epoch": 0.04153322950025958, "grad_norm": 0.13302521407604218, "learning_rate": 2e-07, "loss": 0.035, "step": 445 }, { "clip_ratio/high_max": 0.0020225475091137923, "clip_ratio/high_mean": 0.0007231357540149475, "clip_ratio/low_mean": 0.0005122848124301527, "clip_ratio/low_min": 4.80782582599204e-05, "clip_ratio/region_mean": 0.001235420579178026, "epoch": 0.04162656260026017, "grad_norm": 0.11125293374061584, "learning_rate": 2e-07, "loss": 0.0232, "step": 446 }, { "clip_ratio/high_max": 0.00214793391933199, "clip_ratio/high_mean": 0.0009180108863802161, "clip_ratio/low_mean": 0.0005739794632972917, "clip_ratio/low_min": 2.6883111786446534e-05, "clip_ratio/region_mean": 0.0014919903333066031, "epoch": 0.04171989570026075, "grad_norm": 0.1331784725189209, "learning_rate": 2e-07, "loss": -0.0276, "step": 447 }, { "clip_ratio/high_max": 0.0019525597926985938, "clip_ratio/high_mean": 0.0008614203106844798, "clip_ratio/low_mean": 0.0005691061542165698, "clip_ratio/low_min": 4.080146027263254e-05, "clip_ratio/region_mean": 0.001430526466720039, "epoch": 0.04181322880026133, "grad_norm": 0.11646457016468048, "learning_rate": 2e-07, "loss": -0.0075, "step": 448 }, { "clip_ratio/high_max": 0.0016937920481723268, "clip_ratio/high_mean": 0.0007698446988797514, "clip_ratio/low_mean": 0.000697033019605442, "clip_ratio/low_min": 9.306134415965062e-06, "clip_ratio/region_mean": 0.001466877707571257, "epoch": 0.041906561900261916, "grad_norm": 0.11478401720523834, "learning_rate": 2e-07, "loss": 0.0131, "step": 449 }, { "clip_ratio/high_max": 0.0016763952407927718, "clip_ratio/high_mean": 0.0007130775848054327, "clip_ratio/low_mean": 0.0005509589773282642, "clip_ratio/low_min": 1.3105473044561222e-05, "clip_ratio/region_mean": 0.001264036549400771, "epoch": 0.0419998950002625, "grad_norm": 0.12680761516094208, "learning_rate": 2e-07, "loss": 0.0403, "step": 450 }, { "clip_ratio/high_max": 0.0018596082627482247, "clip_ratio/high_mean": 0.0008023961645449162, "clip_ratio/low_mean": 0.0006026724131515948, "clip_ratio/low_min": 3.8710002627340145e-05, "clip_ratio/region_mean": 0.0014050685858819634, "epoch": 0.042093228100263086, "grad_norm": 0.13138644397258759, "learning_rate": 2e-07, "loss": 0.0515, "step": 451 }, { "clip_ratio/high_max": 0.0014278760972956661, "clip_ratio/high_mean": 0.000624245611106744, "clip_ratio/low_mean": 0.0007028806039670599, "clip_ratio/low_min": 8.194201291189529e-05, "clip_ratio/region_mean": 0.0013271262214402668, "epoch": 0.042186561200263664, "grad_norm": 0.1271778643131256, "learning_rate": 2e-07, "loss": 0.0911, "step": 452 }, { "clip_ratio/high_max": 0.002128368207195308, "clip_ratio/high_mean": 0.0008931682132242713, "clip_ratio/low_mean": 0.0006133725819381652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001506540778791532, "epoch": 0.04227989430026425, "grad_norm": 0.12002625316381454, "learning_rate": 2e-07, "loss": 0.0447, "step": 453 }, { "clip_ratio/high_max": 0.0016965694958344102, "clip_ratio/high_mean": 0.0007292480495380005, "clip_ratio/low_mean": 0.0006118449573477847, "clip_ratio/low_min": 4.237728899170179e-05, "clip_ratio/region_mean": 0.0013410930259851739, "epoch": 0.042373227400264835, "grad_norm": 0.1293649524450302, "learning_rate": 2e-07, "loss": -0.0072, "step": 454 }, { "clip_ratio/high_max": 0.001799706464225892, "clip_ratio/high_mean": 0.0007703606115683215, "clip_ratio/low_mean": 0.0006758462677680654, "clip_ratio/low_min": 5.7804901189228985e-05, "clip_ratio/region_mean": 0.0014462068756984081, "epoch": 0.04246656050026541, "grad_norm": 0.12729597091674805, "learning_rate": 2e-07, "loss": 0.0361, "step": 455 }, { "clip_ratio/high_max": 0.002166321442928165, "clip_ratio/high_mean": 0.0008543400144844782, "clip_ratio/low_mean": 0.0006130776746431366, "clip_ratio/low_min": 3.2258063583867624e-05, "clip_ratio/region_mean": 0.0014674177145934664, "epoch": 0.042559893600266, "grad_norm": 0.13109572231769562, "learning_rate": 2e-07, "loss": 0.0448, "step": 456 }, { "clip_ratio/high_max": 0.0021004257214372046, "clip_ratio/high_mean": 0.0008129820271278732, "clip_ratio/low_mean": 0.0005972016897430876, "clip_ratio/low_min": 2.0955032596248202e-05, "clip_ratio/region_mean": 0.0014101837623456959, "epoch": 0.04265322670026658, "grad_norm": 0.11769860982894897, "learning_rate": 2e-07, "loss": 0.0177, "step": 457 }, { "clip_ratio/high_max": 0.0017277467231906485, "clip_ratio/high_mean": 0.0007483531317120651, "clip_ratio/low_mean": 0.0005988680877635488, "clip_ratio/low_min": 1.4288980310084298e-05, "clip_ratio/region_mean": 0.0013472212121996563, "epoch": 0.04274655980026717, "grad_norm": 0.12136774510145187, "learning_rate": 2e-07, "loss": 0.0323, "step": 458 }, { "clip_ratio/high_max": 0.0019920104241464287, "clip_ratio/high_mean": 0.0008281200898636598, "clip_ratio/low_mean": 0.0006895135193190072, "clip_ratio/low_min": 4.762592197948834e-05, "clip_ratio/region_mean": 0.001517633609182667, "epoch": 0.04283989290026775, "grad_norm": 0.12881548702716827, "learning_rate": 2e-07, "loss": 0.0369, "step": 459 }, { "clip_ratio/high_max": 0.0018852931025321595, "clip_ratio/high_mean": 0.0008347364746441599, "clip_ratio/low_mean": 0.000606104918915662, "clip_ratio/low_min": 1.300457734032534e-05, "clip_ratio/region_mean": 0.0014408413953788113, "epoch": 0.04293322600026833, "grad_norm": 0.126173734664917, "learning_rate": 2e-07, "loss": 0.0458, "step": 460 }, { "clip_ratio/high_max": 0.0019430297397775576, "clip_ratio/high_mean": 0.0007843109597160947, "clip_ratio/low_mean": 0.0005729915283154696, "clip_ratio/low_min": 2.122961996064987e-05, "clip_ratio/region_mean": 0.0013573024698416702, "epoch": 0.04302655910026892, "grad_norm": 0.11492270231246948, "learning_rate": 2e-07, "loss": 0.0278, "step": 461 }, { "clip_ratio/high_max": 0.0019511612335918471, "clip_ratio/high_mean": 0.0007482991186407162, "clip_ratio/low_mean": 0.0005886743383598514, "clip_ratio/low_min": 2.108345324813854e-05, "clip_ratio/region_mean": 0.0013369734551815782, "epoch": 0.0431198922002695, "grad_norm": 0.13352049887180328, "learning_rate": 2e-07, "loss": 0.0189, "step": 462 }, { "clip_ratio/high_max": 0.002021753120061476, "clip_ratio/high_mean": 0.0007751320263196249, "clip_ratio/low_mean": 0.0006295952043728903, "clip_ratio/low_min": 8.166732186509762e-06, "clip_ratio/region_mean": 0.001404727234330494, "epoch": 0.04321322530027008, "grad_norm": 0.12256651371717453, "learning_rate": 2e-07, "loss": 0.0199, "step": 463 }, { "clip_ratio/high_max": 0.002125372953742044, "clip_ratio/high_mean": 0.0008464869752060622, "clip_ratio/low_mean": 0.0005628256531053921, "clip_ratio/low_min": 4.4712321141560096e-05, "clip_ratio/region_mean": 0.001409312626492465, "epoch": 0.043306558400270666, "grad_norm": 0.1265394240617752, "learning_rate": 2e-07, "loss": 0.0051, "step": 464 }, { "clip_ratio/high_max": 0.002220640206360258, "clip_ratio/high_mean": 0.0008117698871501489, "clip_ratio/low_mean": 0.0006259317251533503, "clip_ratio/low_min": 2.27531127166003e-05, "clip_ratio/region_mean": 0.0014377016232174356, "epoch": 0.04339989150027125, "grad_norm": 0.12159605324268341, "learning_rate": 2e-07, "loss": 0.0544, "step": 465 }, { "clip_ratio/high_max": 0.001920515984238591, "clip_ratio/high_mean": 0.0007843609191695577, "clip_ratio/low_mean": 0.0006304565013124375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014148173941066489, "epoch": 0.04349322460027183, "grad_norm": 0.13625863194465637, "learning_rate": 2e-07, "loss": 0.0622, "step": 466 }, { "clip_ratio/high_max": 0.0020645483091357164, "clip_ratio/high_mean": 0.0008827263627608772, "clip_ratio/low_mean": 0.0006521402965518064, "clip_ratio/low_min": 3.399605520826299e-05, "clip_ratio/region_mean": 0.0015348666638601571, "epoch": 0.043586557700272414, "grad_norm": 0.12838351726531982, "learning_rate": 2e-07, "loss": 0.0022, "step": 467 }, { "clip_ratio/high_max": 0.0017020577724906616, "clip_ratio/high_mean": 0.0006898002202433418, "clip_ratio/low_mean": 0.0006109080713940784, "clip_ratio/low_min": 3.2897947676246986e-05, "clip_ratio/region_mean": 0.001300708288908936, "epoch": 0.043679890800273, "grad_norm": 0.12890274822711945, "learning_rate": 2e-07, "loss": 0.0334, "step": 468 }, { "clip_ratio/high_max": 0.001804167539376067, "clip_ratio/high_mean": 0.0008270563594123814, "clip_ratio/low_mean": 0.0006516645971714752, "clip_ratio/low_min": 5.964106367173372e-05, "clip_ratio/region_mean": 0.001478720940212952, "epoch": 0.043773223900273585, "grad_norm": 0.13219425082206726, "learning_rate": 2e-07, "loss": 0.0302, "step": 469 }, { "clip_ratio/high_max": 0.00205077524879016, "clip_ratio/high_mean": 0.0007775441445119213, "clip_ratio/low_mean": 0.0005900697451579617, "clip_ratio/low_min": 2.8003352781524882e-05, "clip_ratio/region_mean": 0.0013676139060407877, "epoch": 0.04386655700027416, "grad_norm": 0.12591680884361267, "learning_rate": 2e-07, "loss": 0.0352, "step": 470 }, { "clip_ratio/high_max": 0.001639964582864195, "clip_ratio/high_mean": 0.0007065572135616094, "clip_ratio/low_mean": 0.0006671074734185822, "clip_ratio/low_min": 8.493865789205302e-05, "clip_ratio/region_mean": 0.001373664716084022, "epoch": 0.04395989010027475, "grad_norm": 0.12724579870700836, "learning_rate": 2e-07, "loss": 0.0751, "step": 471 }, { "clip_ratio/high_max": 0.0019803974428214133, "clip_ratio/high_mean": 0.0007915250698715681, "clip_ratio/low_mean": 0.0006204102937772404, "clip_ratio/low_min": 3.233541428926401e-05, "clip_ratio/region_mean": 0.001411935365467798, "epoch": 0.044053223200275334, "grad_norm": 0.12191016227006912, "learning_rate": 2e-07, "loss": 0.0444, "step": 472 }, { "clip_ratio/high_max": 0.002042929372692015, "clip_ratio/high_mean": 0.0009011866104629007, "clip_ratio/low_mean": 0.0005229233393038157, "clip_ratio/low_min": 2.307359318365343e-05, "clip_ratio/region_mean": 0.0014241099415812641, "epoch": 0.04414655630027592, "grad_norm": 0.12842029333114624, "learning_rate": 2e-07, "loss": -0.0266, "step": 473 }, { "clip_ratio/high_max": 0.0021136288487468846, "clip_ratio/high_mean": 0.0007393581581709441, "clip_ratio/low_mean": 0.0005775156187155517, "clip_ratio/low_min": 2.6750150027510244e-05, "clip_ratio/region_mean": 0.0013168737896194216, "epoch": 0.0442398894002765, "grad_norm": 0.12079551070928574, "learning_rate": 2e-07, "loss": 0.0465, "step": 474 }, { "clip_ratio/high_max": 0.001752253936501802, "clip_ratio/high_mean": 0.0007918350561340048, "clip_ratio/low_mean": 0.0005751688222517259, "clip_ratio/low_min": 1.2891914593637921e-05, "clip_ratio/region_mean": 0.0013670038679265417, "epoch": 0.04433322250027708, "grad_norm": 0.12179074436426163, "learning_rate": 2e-07, "loss": 0.0168, "step": 475 }, { "clip_ratio/high_max": 0.0018314347471459769, "clip_ratio/high_mean": 0.0007210937892523361, "clip_ratio/low_mean": 0.000608924852713244, "clip_ratio/low_min": 3.547241522028344e-05, "clip_ratio/region_mean": 0.0013300186074047815, "epoch": 0.04442655560027767, "grad_norm": 0.13796447217464447, "learning_rate": 2e-07, "loss": 0.022, "step": 476 }, { "clip_ratio/high_max": 0.001912835294206161, "clip_ratio/high_mean": 0.0007472723336832132, "clip_ratio/low_mean": 0.0006029669657436898, "clip_ratio/low_min": 2.3364485969068483e-05, "clip_ratio/region_mean": 0.001350239344901638, "epoch": 0.04451988870027825, "grad_norm": 0.1278499811887741, "learning_rate": 2e-07, "loss": 0.015, "step": 477 }, { "clip_ratio/high_max": 0.0021084704567329027, "clip_ratio/high_mean": 0.0007543496121797943, "clip_ratio/low_mean": 0.0006371656218107091, "clip_ratio/low_min": 1.177246213046601e-05, "clip_ratio/region_mean": 0.0013915152194385882, "epoch": 0.04461322180027883, "grad_norm": 0.12245044857263565, "learning_rate": 2e-07, "loss": 0.0325, "step": 478 }, { "clip_ratio/high_max": 0.0019993680471088737, "clip_ratio/high_mean": 0.0008241827636084054, "clip_ratio/low_mean": 0.000601893492785166, "clip_ratio/low_min": 3.5972205296275206e-05, "clip_ratio/region_mean": 0.0014260762509366032, "epoch": 0.044706554900279416, "grad_norm": 0.1255422979593277, "learning_rate": 2e-07, "loss": -0.0137, "step": 479 }, { "clip_ratio/high_max": 0.0018301896343473345, "clip_ratio/high_mean": 0.000773581545217894, "clip_ratio/low_mean": 0.0005611393589788349, "clip_ratio/low_min": 2.8306192689342424e-05, "clip_ratio/region_mean": 0.001334720913291676, "epoch": 0.04479988800028, "grad_norm": 0.11499013006687164, "learning_rate": 2e-07, "loss": 0.0049, "step": 480 }, { "clip_ratio/high_max": 0.0018726084963418543, "clip_ratio/high_mean": 0.0007928112136141863, "clip_ratio/low_mean": 0.0006376321052812273, "clip_ratio/low_min": 3.402401307539549e-05, "clip_ratio/region_mean": 0.0014304432916105725, "epoch": 0.04489322110028058, "grad_norm": 0.12581494450569153, "learning_rate": 2e-07, "loss": 0.0433, "step": 481 }, { "clip_ratio/high_max": 0.0018266648694407195, "clip_ratio/high_mean": 0.0007048752722766949, "clip_ratio/low_mean": 0.000612429848388274, "clip_ratio/low_min": 4.617186277755536e-05, "clip_ratio/region_mean": 0.0013173050938348752, "epoch": 0.044986554200281165, "grad_norm": 0.11227978765964508, "learning_rate": 2e-07, "loss": 0.013, "step": 482 }, { "clip_ratio/high_max": 0.002026835409196792, "clip_ratio/high_mean": 0.0008249577622336801, "clip_ratio/low_mean": 0.000664360634800687, "clip_ratio/low_min": 5.187642636883538e-05, "clip_ratio/region_mean": 0.0014893184088577982, "epoch": 0.04507988730028175, "grad_norm": 0.13401319086551666, "learning_rate": 2e-07, "loss": 0.0283, "step": 483 }, { "clip_ratio/high_max": 0.00200418126405566, "clip_ratio/high_mean": 0.0007258249979713582, "clip_ratio/low_mean": 0.0006042447566869669, "clip_ratio/low_min": 1.4137072867015377e-05, "clip_ratio/region_mean": 0.0013300697755767033, "epoch": 0.045173220400282335, "grad_norm": 0.1273982673883438, "learning_rate": 2e-07, "loss": 0.0032, "step": 484 }, { "clip_ratio/high_max": 0.0019962828628194984, "clip_ratio/high_mean": 0.0007988323086465243, "clip_ratio/low_mean": 0.000614897369814571, "clip_ratio/low_min": 6.21268527538632e-05, "clip_ratio/region_mean": 0.0014137296966509894, "epoch": 0.04526655350028291, "grad_norm": 0.11907685548067093, "learning_rate": 2e-07, "loss": 0.0212, "step": 485 }, { "clip_ratio/high_max": 0.002219936446635984, "clip_ratio/high_mean": 0.0008455584866169374, "clip_ratio/low_mean": 0.0006325342674244894, "clip_ratio/low_min": 3.939645739592379e-05, "clip_ratio/region_mean": 0.0014780927376705222, "epoch": 0.0453598866002835, "grad_norm": 0.13313397765159607, "learning_rate": 2e-07, "loss": 0.0277, "step": 486 }, { "clip_ratio/high_max": 0.0018361723159614485, "clip_ratio/high_mean": 0.0008283569550258107, "clip_ratio/low_mean": 0.0005664485033776145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013948054547654465, "epoch": 0.045453219700284084, "grad_norm": 0.13463589549064636, "learning_rate": 2e-07, "loss": -0.0271, "step": 487 }, { "clip_ratio/high_max": 0.0019077291108260397, "clip_ratio/high_mean": 0.0007643712706340011, "clip_ratio/low_mean": 0.0006722425578118418, "clip_ratio/low_min": 2.313957884325646e-05, "clip_ratio/region_mean": 0.0014366138166224118, "epoch": 0.04554655280028467, "grad_norm": 0.12350380420684814, "learning_rate": 2e-07, "loss": 0.0377, "step": 488 }, { "clip_ratio/high_max": 0.001878851544461213, "clip_ratio/high_mean": 0.0007357447684626095, "clip_ratio/low_mean": 0.0006696215423289686, "clip_ratio/low_min": 1.1759172593883704e-05, "clip_ratio/region_mean": 0.0014053662998776417, "epoch": 0.04563988590028525, "grad_norm": 0.12656685709953308, "learning_rate": 2e-07, "loss": 0.0696, "step": 489 }, { "clip_ratio/high_max": 0.002309049385075923, "clip_ratio/high_mean": 0.0008815994933684124, "clip_ratio/low_mean": 0.0006200631687534042, "clip_ratio/low_min": 5.387371584220091e-05, "clip_ratio/region_mean": 0.0015016626712167636, "epoch": 0.04573321900028583, "grad_norm": 0.12792600691318512, "learning_rate": 2e-07, "loss": 0.0118, "step": 490 }, { "clip_ratio/high_max": 0.001952918028109707, "clip_ratio/high_mean": 0.0007988641318661394, "clip_ratio/low_mean": 0.0006136745596450055, "clip_ratio/low_min": 3.3151954994536936e-05, "clip_ratio/region_mean": 0.0014125387242529541, "epoch": 0.04582655210028642, "grad_norm": 0.1305781602859497, "learning_rate": 2e-07, "loss": 0.0059, "step": 491 }, { "clip_ratio/high_max": 0.0018082264323311392, "clip_ratio/high_mean": 0.0007484291072614724, "clip_ratio/low_mean": 0.0007289647001016419, "clip_ratio/low_min": 9.058684463525424e-05, "clip_ratio/region_mean": 0.0014773938310099766, "epoch": 0.045919885200286996, "grad_norm": 0.12669751048088074, "learning_rate": 2e-07, "loss": 0.0522, "step": 492 }, { "clip_ratio/high_max": 0.00223208568058908, "clip_ratio/high_mean": 0.0010526750411372632, "clip_ratio/low_mean": 0.0006104895664975629, "clip_ratio/low_min": 1.1200716471648775e-05, "clip_ratio/region_mean": 0.0016631646431051195, "epoch": 0.04601321830028758, "grad_norm": 0.17110952734947205, "learning_rate": 2e-07, "loss": -0.0142, "step": 493 }, { "clip_ratio/high_max": 0.0019754916247620713, "clip_ratio/high_mean": 0.000790720036093262, "clip_ratio/low_mean": 0.000616987883404363, "clip_ratio/low_min": 2.3951964976731688e-05, "clip_ratio/region_mean": 0.0014077079031267203, "epoch": 0.046106551400288166, "grad_norm": 0.12436029314994812, "learning_rate": 2e-07, "loss": 0.026, "step": 494 }, { "clip_ratio/high_max": 0.0019275807462690864, "clip_ratio/high_mean": 0.000853837718750583, "clip_ratio/low_mean": 0.0006897648054291494, "clip_ratio/low_min": 4.1779740968195256e-05, "clip_ratio/region_mean": 0.001543602513265796, "epoch": 0.04619988450028875, "grad_norm": 0.12672999501228333, "learning_rate": 2e-07, "loss": 0.0268, "step": 495 }, { "clip_ratio/high_max": 0.002022351596679073, "clip_ratio/high_mean": 0.0008107790399662917, "clip_ratio/low_mean": 0.0005935219105595024, "clip_ratio/low_min": 4.6087600367172854e-05, "clip_ratio/region_mean": 0.0014043009650777094, "epoch": 0.04629321760028933, "grad_norm": 0.13803862035274506, "learning_rate": 2e-07, "loss": 0.0553, "step": 496 }, { "clip_ratio/high_max": 0.0022692137499689125, "clip_ratio/high_mean": 0.0008730502522666939, "clip_ratio/low_mean": 0.0006377903373504523, "clip_ratio/low_min": 3.6505057323665824e-05, "clip_ratio/region_mean": 0.001510840593255125, "epoch": 0.046386550700289915, "grad_norm": 0.11997704207897186, "learning_rate": 2e-07, "loss": 0.0386, "step": 497 }, { "clip_ratio/high_max": 0.0017912539587996434, "clip_ratio/high_mean": 0.0007183847937994869, "clip_ratio/low_mean": 0.0005708549106202554, "clip_ratio/low_min": 5.4810121582704596e-05, "clip_ratio/region_mean": 0.0012892397171526682, "epoch": 0.0464798838002905, "grad_norm": 0.13851787149906158, "learning_rate": 2e-07, "loss": -0.007, "step": 498 }, { "clip_ratio/high_max": 0.002143704092304688, "clip_ratio/high_mean": 0.0008559335401514545, "clip_ratio/low_mean": 0.0006546061895278399, "clip_ratio/low_min": 5.927369147684658e-05, "clip_ratio/region_mean": 0.0015105397360457573, "epoch": 0.046573216900291085, "grad_norm": 0.12724709510803223, "learning_rate": 2e-07, "loss": 0.0162, "step": 499 }, { "clip_ratio/high_max": 0.0018135844547941815, "clip_ratio/high_mean": 0.0008107461299005081, "clip_ratio/low_mean": 0.0005898757226532325, "clip_ratio/low_min": 2.2519459889736027e-05, "clip_ratio/region_mean": 0.0014006218661961611, "epoch": 0.046666550000291664, "grad_norm": 0.12625931203365326, "learning_rate": 2e-07, "loss": 0.0485, "step": 500 }, { "clip_ratio/high_max": 0.0018712252604018431, "clip_ratio/high_mean": 0.0008269804593510344, "clip_ratio/low_mean": 0.0006532924689963693, "clip_ratio/low_min": 1.26237127915374e-05, "clip_ratio/region_mean": 0.0014802729310758878, "epoch": 0.04675988310029225, "grad_norm": 0.1275644749403, "learning_rate": 2e-07, "loss": -0.0084, "step": 501 }, { "clip_ratio/high_max": 0.0021732505156251136, "clip_ratio/high_mean": 0.0008898263495211722, "clip_ratio/low_mean": 0.0006084626156734885, "clip_ratio/low_min": 2.1192829990468454e-05, "clip_ratio/region_mean": 0.0014982889697421342, "epoch": 0.046853216200292834, "grad_norm": 0.14466458559036255, "learning_rate": 2e-07, "loss": 0.0107, "step": 502 }, { "clip_ratio/high_max": 0.0020997343817725778, "clip_ratio/high_mean": 0.0008563275205233367, "clip_ratio/low_mean": 0.0006268360793910688, "clip_ratio/low_min": 7.52789687794575e-05, "clip_ratio/region_mean": 0.0014831635926384479, "epoch": 0.04694654930029342, "grad_norm": 0.12120368331670761, "learning_rate": 2e-07, "loss": 0.0222, "step": 503 }, { "clip_ratio/high_max": 0.0020162120927125216, "clip_ratio/high_mean": 0.0009255284076061798, "clip_ratio/low_mean": 0.0006706384956487454, "clip_ratio/low_min": 2.7927195333177224e-05, "clip_ratio/region_mean": 0.0015961669123498723, "epoch": 0.047039882400294, "grad_norm": 0.12308359146118164, "learning_rate": 2e-07, "loss": -0.0094, "step": 504 }, { "clip_ratio/high_max": 0.0023234796535689384, "clip_ratio/high_mean": 0.000791755930549698, "clip_ratio/low_mean": 0.0006560724268638296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014478283555945382, "epoch": 0.04713321550029458, "grad_norm": 0.13322095572948456, "learning_rate": 2e-07, "loss": 0.0711, "step": 505 }, { "clip_ratio/high_max": 0.0017675724739092402, "clip_ratio/high_mean": 0.0007076291722114547, "clip_ratio/low_mean": 0.0006863769012852572, "clip_ratio/low_min": 8.11994168543606e-05, "clip_ratio/region_mean": 0.0013940060598542914, "epoch": 0.04722654860029517, "grad_norm": 0.12100698053836823, "learning_rate": 2e-07, "loss": 0.0464, "step": 506 }, { "clip_ratio/high_max": 0.0021689559798687696, "clip_ratio/high_mean": 0.0008927038761612494, "clip_ratio/low_mean": 0.0006382088777172612, "clip_ratio/low_min": 3.605661368055735e-05, "clip_ratio/region_mean": 0.0015309127411455847, "epoch": 0.047319881700295746, "grad_norm": 0.13588377833366394, "learning_rate": 2e-07, "loss": 0.0159, "step": 507 }, { "clip_ratio/high_max": 0.002147663697542157, "clip_ratio/high_mean": 0.0008934457691793796, "clip_ratio/low_mean": 0.0006903692901687464, "clip_ratio/low_min": 6.498480297523201e-05, "clip_ratio/region_mean": 0.0015838150793570094, "epoch": 0.04741321480029633, "grad_norm": 0.14163455367088318, "learning_rate": 2e-07, "loss": 0.0405, "step": 508 }, { "clip_ratio/high_max": 0.0017830312644946389, "clip_ratio/high_mean": 0.0007750870954623679, "clip_ratio/low_mean": 0.0005964169172330003, "clip_ratio/low_min": 5.2187920118740294e-05, "clip_ratio/region_mean": 0.001371504014969105, "epoch": 0.04750654790029692, "grad_norm": 0.12221848219633102, "learning_rate": 2e-07, "loss": 0.0046, "step": 509 }, { "clip_ratio/high_max": 0.002102970647683833, "clip_ratio/high_mean": 0.0008855807791405823, "clip_ratio/low_mean": 0.0007151324825827032, "clip_ratio/low_min": 9.362271612189943e-05, "clip_ratio/region_mean": 0.0016007132217055187, "epoch": 0.0475998810002975, "grad_norm": 0.1492527425289154, "learning_rate": 2e-07, "loss": 0.021, "step": 510 }, { "clip_ratio/high_max": 0.0019890679941454437, "clip_ratio/high_mean": 0.0007588792395836208, "clip_ratio/low_mean": 0.0006563334909515106, "clip_ratio/low_min": 7.98090077296365e-05, "clip_ratio/region_mean": 0.0014152127405395731, "epoch": 0.04769321410029808, "grad_norm": 0.13701818883419037, "learning_rate": 2e-07, "loss": 0.0621, "step": 511 }, { "clip_ratio/high_max": 0.002095494943205267, "clip_ratio/high_mean": 0.0007698805056861602, "clip_ratio/low_mean": 0.0006854003895568894, "clip_ratio/low_min": 8.843113027978688e-05, "clip_ratio/region_mean": 0.001455280838854378, "epoch": 0.047786547200298665, "grad_norm": 0.11331798881292343, "learning_rate": 2e-07, "loss": 0.0215, "step": 512 }, { "clip_ratio/high_max": 0.001661100690398598, "clip_ratio/high_mean": 0.0006640887950197794, "clip_ratio/low_mean": 0.0005831441058035125, "clip_ratio/low_min": 6.20927876298083e-05, "clip_ratio/region_mean": 0.00124723292174167, "completions/clipped_ratio": 0.0172206333705357, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 652.1428833007812, "completions/mean_terminated_length": 591.7982788085938, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.04787988030029925, "grad_norm": 0.11295594274997711, "learning_rate": 2e-07, "loss": 0.0083, "num_tokens": 415518197.0, "reward": 0.5720999836921692, "reward_std": 0.19469255208969116, "rewards/simpleverify_reward/mean": 0.5720999836921692, "rewards/simpleverify_reward/std": 0.4947764575481415, "step": 513 }, { "clip_ratio/high_max": 0.0017237266001757234, "clip_ratio/high_mean": 0.0005975122312520398, "clip_ratio/low_mean": 0.000616979179540067, "clip_ratio/low_min": 1.0918937732640188e-05, "clip_ratio/region_mean": 0.0012144914289820008, "epoch": 0.047973213400299836, "grad_norm": 0.1252632439136505, "learning_rate": 2e-07, "loss": 0.0313, "step": 514 }, { "clip_ratio/high_max": 0.0017588489426998422, "clip_ratio/high_mean": 0.0006889515407237923, "clip_ratio/low_mean": 0.0005534208667086205, "clip_ratio/low_min": 3.1923004826239776e-05, "clip_ratio/region_mean": 0.001242372381966561, "epoch": 0.048066546500300414, "grad_norm": 0.1317816972732544, "learning_rate": 2e-07, "loss": 0.0252, "step": 515 }, { "clip_ratio/high_max": 0.001726904469251167, "clip_ratio/high_mean": 0.0006845847510703607, "clip_ratio/low_mean": 0.0005103389612486353, "clip_ratio/low_min": 2.303974179085344e-05, "clip_ratio/region_mean": 0.0011949236977670807, "epoch": 0.048159879600301, "grad_norm": 0.10822305828332901, "learning_rate": 2e-07, "loss": 0.0156, "step": 516 }, { "clip_ratio/high_max": 0.0015383017744170502, "clip_ratio/high_mean": 0.000589987213970744, "clip_ratio/low_mean": 0.0005662164348905208, "clip_ratio/low_min": 4.35765632573748e-05, "clip_ratio/region_mean": 0.0011562036561372224, "epoch": 0.048253212700301584, "grad_norm": 0.12652388215065002, "learning_rate": 2e-07, "loss": 0.0794, "step": 517 }, { "clip_ratio/high_max": 0.001607385140232509, "clip_ratio/high_mean": 0.0006812076535425149, "clip_ratio/low_mean": 0.0005548001608985942, "clip_ratio/low_min": 9.63701731961919e-05, "clip_ratio/region_mean": 0.0012360078035271727, "epoch": 0.04834654580030216, "grad_norm": 0.1202060654759407, "learning_rate": 2e-07, "loss": 0.0507, "step": 518 }, { "clip_ratio/high_max": 0.0014725189357704949, "clip_ratio/high_mean": 0.0006108946126914816, "clip_ratio/low_mean": 0.0005467043720273068, "clip_ratio/low_min": 5.570644498220645e-05, "clip_ratio/region_mean": 0.001157598991994746, "epoch": 0.04843987890030275, "grad_norm": 0.10508166253566742, "learning_rate": 2e-07, "loss": 0.0384, "step": 519 }, { "clip_ratio/high_max": 0.0015348132474173326, "clip_ratio/high_mean": 0.000690805882186396, "clip_ratio/low_mean": 0.0006186803429955035, "clip_ratio/low_min": 1.8996959624928422e-05, "clip_ratio/region_mean": 0.0013094862370053306, "epoch": 0.04853321200030333, "grad_norm": 0.12275245040655136, "learning_rate": 2e-07, "loss": 0.0352, "step": 520 }, { "clip_ratio/high_max": 0.0016642865593894385, "clip_ratio/high_mean": 0.0006561715599673335, "clip_ratio/low_mean": 0.00048237445480481256, "clip_ratio/low_min": 2.0643663447117433e-05, "clip_ratio/region_mean": 0.0011385460202291142, "epoch": 0.04862654510030392, "grad_norm": 0.10232160985469818, "learning_rate": 2e-07, "loss": 0.0025, "step": 521 }, { "clip_ratio/high_max": 0.002190479352066177, "clip_ratio/high_mean": 0.0007919492927612737, "clip_ratio/low_mean": 0.0005792472438770346, "clip_ratio/low_min": 3.406218002055539e-05, "clip_ratio/region_mean": 0.0013711965366383083, "epoch": 0.048719878200304496, "grad_norm": 0.12254737317562103, "learning_rate": 2e-07, "loss": 0.023, "step": 522 }, { "clip_ratio/high_max": 0.002001780132559361, "clip_ratio/high_mean": 0.0007607927418575855, "clip_ratio/low_mean": 0.0006321199807644007, "clip_ratio/low_min": 6.249587386264466e-05, "clip_ratio/region_mean": 0.001392912738083396, "epoch": 0.04881321130030508, "grad_norm": 0.13201932609081268, "learning_rate": 2e-07, "loss": 0.0609, "step": 523 }, { "clip_ratio/high_max": 0.0016624363634036854, "clip_ratio/high_mean": 0.0006359516473821714, "clip_ratio/low_mean": 0.0005255334808680345, "clip_ratio/low_min": 6.80784614814911e-05, "clip_ratio/region_mean": 0.0011614851209742483, "epoch": 0.04890654440030567, "grad_norm": 0.12014492601156235, "learning_rate": 2e-07, "loss": 0.0531, "step": 524 }, { "clip_ratio/high_max": 0.0016420011743321083, "clip_ratio/high_mean": 0.0007468117182725109, "clip_ratio/low_mean": 0.000538583000889048, "clip_ratio/low_min": 1.2393416909617372e-05, "clip_ratio/region_mean": 0.0012853947337134741, "epoch": 0.04899987750030625, "grad_norm": 0.12996400892734528, "learning_rate": 2e-07, "loss": -0.0127, "step": 525 }, { "clip_ratio/high_max": 0.0015967537183314562, "clip_ratio/high_mean": 0.0006850626268715132, "clip_ratio/low_mean": 0.0005046642399975099, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011897268777829595, "epoch": 0.04909321060030683, "grad_norm": 0.12281105667352676, "learning_rate": 2e-07, "loss": 0.0048, "step": 526 }, { "clip_ratio/high_max": 0.0017729788814904168, "clip_ratio/high_mean": 0.0006798362901463406, "clip_ratio/low_mean": 0.0006540745143865934, "clip_ratio/low_min": 4.6668398681504186e-05, "clip_ratio/region_mean": 0.001333910804532934, "epoch": 0.049186543700307415, "grad_norm": 0.12828707695007324, "learning_rate": 2e-07, "loss": 0.0805, "step": 527 }, { "clip_ratio/high_max": 0.0015139818424358964, "clip_ratio/high_mean": 0.0006359000462907716, "clip_ratio/low_mean": 0.0005450275330076693, "clip_ratio/low_min": 2.8674121494987048e-05, "clip_ratio/region_mean": 0.001180927600216819, "epoch": 0.049279876800308, "grad_norm": 0.11533678323030472, "learning_rate": 2e-07, "loss": 0.0313, "step": 528 }, { "clip_ratio/high_max": 0.0017079887220461387, "clip_ratio/high_mean": 0.0006885365946800448, "clip_ratio/low_mean": 0.0005163939740668866, "clip_ratio/low_min": 2.235165311503806e-05, "clip_ratio/region_mean": 0.0012049305405525956, "epoch": 0.049373209900308586, "grad_norm": 0.12229184806346893, "learning_rate": 2e-07, "loss": 0.0058, "step": 529 }, { "clip_ratio/high_max": 0.001500551814388018, "clip_ratio/high_mean": 0.0005652915615428356, "clip_ratio/low_mean": 0.0006068389120628126, "clip_ratio/low_min": 4.8852284180611605e-05, "clip_ratio/region_mean": 0.0011721304690581746, "epoch": 0.049466543000309164, "grad_norm": 0.12109557539224625, "learning_rate": 2e-07, "loss": 0.0607, "step": 530 }, { "clip_ratio/high_max": 0.0016181935388885904, "clip_ratio/high_mean": 0.0006836248903709929, "clip_ratio/low_mean": 0.0005544928280869499, "clip_ratio/low_min": 1.6956049876171164e-05, "clip_ratio/region_mean": 0.0012381177257339004, "epoch": 0.04955987610030975, "grad_norm": 0.1258392482995987, "learning_rate": 2e-07, "loss": 0.0716, "step": 531 }, { "clip_ratio/high_max": 0.002019554107391741, "clip_ratio/high_mean": 0.0007489765057471232, "clip_ratio/low_mean": 0.0005585811013588682, "clip_ratio/low_min": 1.2324985618761275e-05, "clip_ratio/region_mean": 0.0013075576316623483, "epoch": 0.049653209200310335, "grad_norm": 0.11476071923971176, "learning_rate": 2e-07, "loss": -0.0045, "step": 532 }, { "clip_ratio/high_max": 0.0016391516983276233, "clip_ratio/high_mean": 0.0006915751600899966, "clip_ratio/low_mean": 0.0005424370228865882, "clip_ratio/low_min": 2.657572167663602e-05, "clip_ratio/region_mean": 0.0012340122048044577, "epoch": 0.04974654230031091, "grad_norm": 0.11577029526233673, "learning_rate": 2e-07, "loss": -0.0018, "step": 533 }, { "clip_ratio/high_max": 0.0018789951136568561, "clip_ratio/high_mean": 0.0007453808593709255, "clip_ratio/low_mean": 0.0006554177307407372, "clip_ratio/low_min": 2.5383565116499085e-05, "clip_ratio/region_mean": 0.0014007985810167156, "epoch": 0.0498398754003115, "grad_norm": 0.1314806491136551, "learning_rate": 2e-07, "loss": 0.0223, "step": 534 }, { "clip_ratio/high_max": 0.002133504436642397, "clip_ratio/high_mean": 0.0007812856729287887, "clip_ratio/low_mean": 0.0005383214374887757, "clip_ratio/low_min": 1.1285662367299665e-05, "clip_ratio/region_mean": 0.0013196071231504902, "epoch": 0.04993320850031208, "grad_norm": 0.11995318531990051, "learning_rate": 2e-07, "loss": 0.0001, "step": 535 }, { "clip_ratio/high_max": 0.0017285073772654869, "clip_ratio/high_mean": 0.0007229851980810054, "clip_ratio/low_mean": 0.000613003843682236, "clip_ratio/low_min": 2.2883815290697385e-05, "clip_ratio/region_mean": 0.001335989021754358, "epoch": 0.05002654160031267, "grad_norm": 0.12192007154226303, "learning_rate": 2e-07, "loss": 0.024, "step": 536 }, { "clip_ratio/high_max": 0.0019033665012102574, "clip_ratio/high_mean": 0.0007032163939584279, "clip_ratio/low_mean": 0.0005144267479408882, "clip_ratio/low_min": 1.3531067452277057e-05, "clip_ratio/region_mean": 0.0012176431227999274, "epoch": 0.05011987470031325, "grad_norm": 0.11885754764080048, "learning_rate": 2e-07, "loss": 0.0197, "step": 537 }, { "clip_ratio/high_max": 0.001647905221034307, "clip_ratio/high_mean": 0.0007719422883383231, "clip_ratio/low_mean": 0.000595572904785513, "clip_ratio/low_min": 3.1016971661301795e-05, "clip_ratio/region_mean": 0.0013675152040377725, "epoch": 0.05021320780031383, "grad_norm": 0.12711986899375916, "learning_rate": 2e-07, "loss": 0.0374, "step": 538 }, { "clip_ratio/high_max": 0.0018654368395800702, "clip_ratio/high_mean": 0.0007804810084053315, "clip_ratio/low_mean": 0.0006269362656894373, "clip_ratio/low_min": 1.0798203220474534e-05, "clip_ratio/region_mean": 0.0014074173013796099, "epoch": 0.05030654090031442, "grad_norm": 0.12740099430084229, "learning_rate": 2e-07, "loss": 0.0439, "step": 539 }, { "clip_ratio/high_max": 0.0016882554409676231, "clip_ratio/high_mean": 0.0006842413677077275, "clip_ratio/low_mean": 0.0004812871993635781, "clip_ratio/low_min": 3.226926401111996e-05, "clip_ratio/region_mean": 0.001165528577985242, "epoch": 0.050399874000315, "grad_norm": 0.13205616176128387, "learning_rate": 2e-07, "loss": 0.0016, "step": 540 }, { "clip_ratio/high_max": 0.0018944383846246637, "clip_ratio/high_mean": 0.0007249890186358243, "clip_ratio/low_mean": 0.0005709368660973269, "clip_ratio/low_min": 1.4272664884629194e-05, "clip_ratio/region_mean": 0.0012959258710907307, "epoch": 0.05049320710031558, "grad_norm": 0.12349730730056763, "learning_rate": 2e-07, "loss": 0.0517, "step": 541 }, { "clip_ratio/high_max": 0.0016102362715173513, "clip_ratio/high_mean": 0.000684143047692487, "clip_ratio/low_mean": 0.0005657093151967274, "clip_ratio/low_min": 5.466116499519558e-05, "clip_ratio/region_mean": 0.0012498523828980979, "epoch": 0.050586540200316166, "grad_norm": 0.12686222791671753, "learning_rate": 2e-07, "loss": 0.0378, "step": 542 }, { "clip_ratio/high_max": 0.0019019496394321322, "clip_ratio/high_mean": 0.0007148446602514014, "clip_ratio/low_mean": 0.0006074918919694028, "clip_ratio/low_min": 2.6286372758477228e-05, "clip_ratio/region_mean": 0.0013223365785961505, "epoch": 0.05067987330031675, "grad_norm": 0.13016310334205627, "learning_rate": 2e-07, "loss": -0.0042, "step": 543 }, { "clip_ratio/high_max": 0.0018126615032088012, "clip_ratio/high_mean": 0.0007476470436813543, "clip_ratio/low_mean": 0.0005630906052829232, "clip_ratio/low_min": 3.707310406753095e-05, "clip_ratio/region_mean": 0.0013107376398693305, "epoch": 0.05077320640031733, "grad_norm": 0.11862743645906448, "learning_rate": 2e-07, "loss": 0.0189, "step": 544 }, { "clip_ratio/high_max": 0.0016176494245883077, "clip_ratio/high_mean": 0.0007058940082060872, "clip_ratio/low_mean": 0.0004515109249041416, "clip_ratio/low_min": 5.132849582878407e-05, "clip_ratio/region_mean": 0.0011574049458431546, "epoch": 0.050866539500317914, "grad_norm": 0.1153007298707962, "learning_rate": 2e-07, "loss": -0.0393, "step": 545 }, { "clip_ratio/high_max": 0.001659826284594601, "clip_ratio/high_mean": 0.0006493270520877559, "clip_ratio/low_mean": 0.0005628662484014058, "clip_ratio/low_min": 1.3584003681899048e-05, "clip_ratio/region_mean": 0.0012121932923037093, "epoch": 0.0509598726003185, "grad_norm": 0.11391019076108932, "learning_rate": 2e-07, "loss": 0.0296, "step": 546 }, { "clip_ratio/high_max": 0.0019514466475811787, "clip_ratio/high_mean": 0.0007983858886291273, "clip_ratio/low_mean": 0.0005713520640711067, "clip_ratio/low_min": 2.3665277694817632e-05, "clip_ratio/region_mean": 0.0013697379836230539, "epoch": 0.051053205700319085, "grad_norm": 0.11803116649389267, "learning_rate": 2e-07, "loss": -0.0045, "step": 547 }, { "clip_ratio/high_max": 0.0017589134949957952, "clip_ratio/high_mean": 0.0006785076411688351, "clip_ratio/low_mean": 0.0005395087882789085, "clip_ratio/low_min": 2.339071943424642e-05, "clip_ratio/region_mean": 0.0012180164594610687, "epoch": 0.05114653880031966, "grad_norm": 0.11099693924188614, "learning_rate": 2e-07, "loss": 0.0513, "step": 548 }, { "clip_ratio/high_max": 0.0016770981237641536, "clip_ratio/high_mean": 0.0007816170564183267, "clip_ratio/low_mean": 0.0006341719736155937, "clip_ratio/low_min": 1.9083969164057635e-05, "clip_ratio/region_mean": 0.0014157890364003833, "epoch": 0.05123987190032025, "grad_norm": 0.12385471165180206, "learning_rate": 2e-07, "loss": 0.0413, "step": 549 }, { "clip_ratio/high_max": 0.001685597060713917, "clip_ratio/high_mean": 0.0006680236219835933, "clip_ratio/low_mean": 0.0005954735115665244, "clip_ratio/low_min": 3.962612299801549e-05, "clip_ratio/region_mean": 0.0012634971280931495, "epoch": 0.05133320500032083, "grad_norm": 0.1261381059885025, "learning_rate": 2e-07, "loss": 0.0389, "step": 550 }, { "clip_ratio/high_max": 0.0016241915363934822, "clip_ratio/high_mean": 0.0006705939649691572, "clip_ratio/low_mean": 0.0006389973968907725, "clip_ratio/low_min": 4.383204850455513e-05, "clip_ratio/region_mean": 0.0013095914073346648, "epoch": 0.05142653810032142, "grad_norm": 0.11627063155174255, "learning_rate": 2e-07, "loss": 0.01, "step": 551 }, { "clip_ratio/high_max": 0.001547732565086335, "clip_ratio/high_mean": 0.0006917518730915617, "clip_ratio/low_mean": 0.0005334458301149425, "clip_ratio/low_min": 1.0277914952894207e-05, "clip_ratio/region_mean": 0.0012251977095729671, "epoch": 0.051519871200322, "grad_norm": 0.12772083282470703, "learning_rate": 2e-07, "loss": 0.0272, "step": 552 }, { "clip_ratio/high_max": 0.001941527210874483, "clip_ratio/high_mean": 0.0007605968257848872, "clip_ratio/low_mean": 0.0005486251939146314, "clip_ratio/low_min": 4.6300952817546204e-05, "clip_ratio/region_mean": 0.001309222021518508, "epoch": 0.05161320430032258, "grad_norm": 0.11804698407649994, "learning_rate": 2e-07, "loss": -0.0103, "step": 553 }, { "clip_ratio/high_max": 0.002328872938960558, "clip_ratio/high_mean": 0.0008920800373743987, "clip_ratio/low_mean": 0.0005771943724539597, "clip_ratio/low_min": 4.11948522014427e-05, "clip_ratio/region_mean": 0.001469274426199263, "epoch": 0.05170653740032317, "grad_norm": 0.12142713367938995, "learning_rate": 2e-07, "loss": 0.0009, "step": 554 }, { "clip_ratio/high_max": 0.0018062348626699531, "clip_ratio/high_mean": 0.0007341348309637397, "clip_ratio/low_mean": 0.0005699974499293603, "clip_ratio/low_min": 4.115113461011788e-05, "clip_ratio/region_mean": 0.0013041322636127006, "epoch": 0.05179987050032375, "grad_norm": 0.12030376493930817, "learning_rate": 2e-07, "loss": 0.0273, "step": 555 }, { "clip_ratio/high_max": 0.001751141437125625, "clip_ratio/high_mean": 0.0006809760661781183, "clip_ratio/low_mean": 0.0006588429841940524, "clip_ratio/low_min": 3.0098226488917135e-05, "clip_ratio/region_mean": 0.001339819031272782, "epoch": 0.05189320360032433, "grad_norm": 0.12279978394508362, "learning_rate": 2e-07, "loss": 0.054, "step": 556 }, { "clip_ratio/high_max": 0.0018652261060196906, "clip_ratio/high_mean": 0.000785793181421468, "clip_ratio/low_mean": 0.0006476215894508641, "clip_ratio/low_min": 5.609282607110799e-05, "clip_ratio/region_mean": 0.0014334147635963745, "epoch": 0.051986536700324916, "grad_norm": 0.1273386925458908, "learning_rate": 2e-07, "loss": 0.0093, "step": 557 }, { "clip_ratio/high_max": 0.0017728582897689193, "clip_ratio/high_mean": 0.0007740818782622227, "clip_ratio/low_mean": 0.0005255322676021024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012996141485928092, "epoch": 0.0520798698003255, "grad_norm": 0.12609295547008514, "learning_rate": 2e-07, "loss": -0.0068, "step": 558 }, { "clip_ratio/high_max": 0.001737559650791809, "clip_ratio/high_mean": 0.0007005978313827654, "clip_ratio/low_mean": 0.0005938601543675759, "clip_ratio/low_min": 7.650905536138453e-05, "clip_ratio/region_mean": 0.0012944579902978148, "epoch": 0.05217320290032608, "grad_norm": 0.11379167437553406, "learning_rate": 2e-07, "loss": 0.0742, "step": 559 }, { "clip_ratio/high_max": 0.0019275162121630274, "clip_ratio/high_mean": 0.0007835304459149484, "clip_ratio/low_mean": 0.0005804659122077283, "clip_ratio/low_min": 3.123789429082535e-05, "clip_ratio/region_mean": 0.0013639963835885283, "epoch": 0.052266536000326665, "grad_norm": 0.12291675806045532, "learning_rate": 2e-07, "loss": -0.0034, "step": 560 }, { "clip_ratio/high_max": 0.0019921707862522453, "clip_ratio/high_mean": 0.0007690047405048972, "clip_ratio/low_mean": 0.0005607053590210853, "clip_ratio/low_min": 1.0079019375552889e-05, "clip_ratio/region_mean": 0.001329710092250025, "epoch": 0.05235986910032725, "grad_norm": 0.12805572152137756, "learning_rate": 2e-07, "loss": 0.035, "step": 561 }, { "clip_ratio/high_max": 0.0020256716925359797, "clip_ratio/high_mean": 0.0007080993491399568, "clip_ratio/low_mean": 0.000642150219391624, "clip_ratio/low_min": 9.197429972118698e-05, "clip_ratio/region_mean": 0.0013502495785360225, "epoch": 0.052453202200327835, "grad_norm": 0.12678885459899902, "learning_rate": 2e-07, "loss": 0.0345, "step": 562 }, { "clip_ratio/high_max": 0.0018501561826269608, "clip_ratio/high_mean": 0.0007906521295808489, "clip_ratio/low_mean": 0.0006130010578999645, "clip_ratio/low_min": 1.2789032552973367e-05, "clip_ratio/region_mean": 0.0014036531865713187, "epoch": 0.05254653530032841, "grad_norm": 0.12275071442127228, "learning_rate": 2e-07, "loss": 0.0255, "step": 563 }, { "clip_ratio/high_max": 0.001977594532945659, "clip_ratio/high_mean": 0.0007286468371603405, "clip_ratio/low_mean": 0.0005730532539018895, "clip_ratio/low_min": 1.5687750419601798e-05, "clip_ratio/region_mean": 0.0013017000492254738, "epoch": 0.052639868400329, "grad_norm": 0.3113510012626648, "learning_rate": 2e-07, "loss": 0.0266, "step": 564 }, { "clip_ratio/high_max": 0.0019253962746006437, "clip_ratio/high_mean": 0.0007782668708387064, "clip_ratio/low_mean": 0.0004541981934380601, "clip_ratio/low_min": 1.1832638847408816e-05, "clip_ratio/region_mean": 0.001232465041539399, "epoch": 0.052733201500329584, "grad_norm": 0.11122290045022964, "learning_rate": 2e-07, "loss": 0.0027, "step": 565 }, { "clip_ratio/high_max": 0.0019160892043146305, "clip_ratio/high_mean": 0.0007441970956278965, "clip_ratio/low_mean": 0.0005536141543416306, "clip_ratio/low_min": 4.994019036530517e-05, "clip_ratio/region_mean": 0.001297811235417612, "epoch": 0.05282653460033017, "grad_norm": 0.12345101684331894, "learning_rate": 2e-07, "loss": 0.029, "step": 566 }, { "clip_ratio/high_max": 0.001711590462946333, "clip_ratio/high_mean": 0.0007046318169159349, "clip_ratio/low_mean": 0.0005403607283369638, "clip_ratio/low_min": 2.1772672880615573e-05, "clip_ratio/region_mean": 0.0012449925488908775, "epoch": 0.05291986770033075, "grad_norm": 0.11366830766201019, "learning_rate": 2e-07, "loss": 0.033, "step": 567 }, { "clip_ratio/high_max": 0.0018906180885096546, "clip_ratio/high_mean": 0.0007381432924375986, "clip_ratio/low_mean": 0.0006687447166768834, "clip_ratio/low_min": 2.9912986065028235e-05, "clip_ratio/region_mean": 0.0014068880373088177, "epoch": 0.05301320080033133, "grad_norm": 0.11527183651924133, "learning_rate": 2e-07, "loss": 0.0396, "step": 568 }, { "clip_ratio/high_max": 0.002100765923387371, "clip_ratio/high_mean": 0.0008215222424041713, "clip_ratio/low_mean": 0.0007018314790911973, "clip_ratio/low_min": 7.120884220057633e-06, "clip_ratio/region_mean": 0.001523353723314358, "epoch": 0.05310653390033192, "grad_norm": 0.12579979002475739, "learning_rate": 2e-07, "loss": 0.0299, "step": 569 }, { "clip_ratio/high_max": 0.0018478761703590862, "clip_ratio/high_mean": 0.000703203606462921, "clip_ratio/low_mean": 0.0006114704010542482, "clip_ratio/low_min": 4.962616185366642e-05, "clip_ratio/region_mean": 0.0013146739911462646, "epoch": 0.053199867000332496, "grad_norm": 0.1293213814496994, "learning_rate": 2e-07, "loss": 0.0452, "step": 570 }, { "clip_ratio/high_max": 0.0018802962549671065, "clip_ratio/high_mean": 0.0007975716343935346, "clip_ratio/low_mean": 0.000529888029632275, "clip_ratio/low_min": 1.4272664884629194e-05, "clip_ratio/region_mean": 0.0013274596531118732, "epoch": 0.05329320010033308, "grad_norm": 0.12362080812454224, "learning_rate": 2e-07, "loss": 0.0125, "step": 571 }, { "clip_ratio/high_max": 0.0020141180029895622, "clip_ratio/high_mean": 0.0007748335974611109, "clip_ratio/low_mean": 0.0006557353281095857, "clip_ratio/low_min": 1.909253296616953e-05, "clip_ratio/region_mean": 0.0014305689328466542, "epoch": 0.053386533200333666, "grad_norm": 0.12238020449876785, "learning_rate": 2e-07, "loss": 0.0195, "step": 572 }, { "clip_ratio/high_max": 0.0019384488332434557, "clip_ratio/high_mean": 0.0007668384714634158, "clip_ratio/low_mean": 0.0006219646838871995, "clip_ratio/low_min": 3.567332350939978e-05, "clip_ratio/region_mean": 0.0013888031753594987, "epoch": 0.05347986630033425, "grad_norm": 0.11997241526842117, "learning_rate": 2e-07, "loss": 0.0426, "step": 573 }, { "clip_ratio/high_max": 0.0017065440297301393, "clip_ratio/high_mean": 0.0006765083517166204, "clip_ratio/low_mean": 0.0005677262388417148, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012442345978342928, "epoch": 0.05357319940033483, "grad_norm": 0.11231635510921478, "learning_rate": 2e-07, "loss": 0.0367, "step": 574 }, { "clip_ratio/high_max": 0.001864747053332394, "clip_ratio/high_mean": 0.0007696370557823684, "clip_ratio/low_mean": 0.0005909162719035521, "clip_ratio/low_min": 6.291837507887976e-05, "clip_ratio/region_mean": 0.001360553338599857, "epoch": 0.053666532500335415, "grad_norm": 0.1277708113193512, "learning_rate": 2e-07, "loss": 0.049, "step": 575 }, { "clip_ratio/high_max": 0.0015508401738770772, "clip_ratio/high_mean": 0.0006332397433652659, "clip_ratio/low_mean": 0.0005332851887942525, "clip_ratio/low_min": 4.6439768993877806e-05, "clip_ratio/region_mean": 0.001166524933069013, "epoch": 0.053759865600336, "grad_norm": 0.11868428438901901, "learning_rate": 2e-07, "loss": 0.0228, "step": 576 }, { "clip_ratio/high_max": 0.001946644792042207, "clip_ratio/high_mean": 0.0007400521008094074, "clip_ratio/low_mean": 0.0005837154021719471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013237675084383227, "epoch": 0.053853198700336585, "grad_norm": 0.13686397671699524, "learning_rate": 2e-07, "loss": 0.0301, "step": 577 }, { "clip_ratio/high_max": 0.0015608372632414103, "clip_ratio/high_mean": 0.0006582285877811955, "clip_ratio/low_mean": 0.0006902497334522195, "clip_ratio/low_min": 4.6427281631622463e-05, "clip_ratio/region_mean": 0.0013484783048625104, "epoch": 0.053946531800337164, "grad_norm": 0.13206499814987183, "learning_rate": 2e-07, "loss": 0.069, "step": 578 }, { "clip_ratio/high_max": 0.0020299409370636567, "clip_ratio/high_mean": 0.0007910716558399145, "clip_ratio/low_mean": 0.0006031796146999113, "clip_ratio/low_min": 7.095907039911253e-05, "clip_ratio/region_mean": 0.0013942512450739741, "epoch": 0.05403986490033775, "grad_norm": 0.12364768981933594, "learning_rate": 2e-07, "loss": 0.0306, "step": 579 }, { "clip_ratio/high_max": 0.002113464906869922, "clip_ratio/high_mean": 0.0008170472119672922, "clip_ratio/low_mean": 0.0005592982024609228, "clip_ratio/low_min": 6.888969437568448e-05, "clip_ratio/region_mean": 0.001376345411699731, "epoch": 0.054133198000338334, "grad_norm": 0.13236679136753082, "learning_rate": 2e-07, "loss": 0.0065, "step": 580 }, { "clip_ratio/high_max": 0.0018834772781701759, "clip_ratio/high_mean": 0.0006586236304428894, "clip_ratio/low_mean": 0.0006315196424111491, "clip_ratio/low_min": 2.9235992769827135e-05, "clip_ratio/region_mean": 0.0012901432673970703, "epoch": 0.05422653110033892, "grad_norm": 0.11762931942939758, "learning_rate": 2e-07, "loss": 0.0778, "step": 581 }, { "clip_ratio/high_max": 0.001979979882889893, "clip_ratio/high_mean": 0.0007834097814338747, "clip_ratio/low_mean": 0.0005905541183892637, "clip_ratio/low_min": 7.776623624522472e-05, "clip_ratio/region_mean": 0.0013739638743572868, "epoch": 0.0543198642003395, "grad_norm": 0.13116592168807983, "learning_rate": 2e-07, "loss": 0.0576, "step": 582 }, { "clip_ratio/high_max": 0.001894597789942054, "clip_ratio/high_mean": 0.0007478521602024557, "clip_ratio/low_mean": 0.0005253698182059452, "clip_ratio/low_min": 1.6646690710331313e-05, "clip_ratio/region_mean": 0.0012732219620374963, "epoch": 0.05441319730034008, "grad_norm": 0.12515588104724884, "learning_rate": 2e-07, "loss": 0.0151, "step": 583 }, { "clip_ratio/high_max": 0.001880470743344631, "clip_ratio/high_mean": 0.0007337635379371932, "clip_ratio/low_mean": 0.0006381958919519093, "clip_ratio/low_min": 9.437063181394478e-05, "clip_ratio/region_mean": 0.0013719593844143674, "epoch": 0.05450653040034067, "grad_norm": 0.18464471399784088, "learning_rate": 2e-07, "loss": 0.0784, "step": 584 }, { "clip_ratio/high_max": 0.002296121288964059, "clip_ratio/high_mean": 0.000849091944473912, "clip_ratio/low_mean": 0.0005523095387616195, "clip_ratio/low_min": 3.928477372028283e-05, "clip_ratio/region_mean": 0.001401401503244415, "epoch": 0.054599863500341246, "grad_norm": 0.13381274044513702, "learning_rate": 2e-07, "loss": -0.0017, "step": 585 }, { "clip_ratio/high_max": 0.0021165946454857476, "clip_ratio/high_mean": 0.0007287215194082819, "clip_ratio/low_mean": 0.0006423675531550543, "clip_ratio/low_min": 3.622138683567755e-05, "clip_ratio/region_mean": 0.0013710890889342409, "epoch": 0.05469319660034183, "grad_norm": 0.1129380315542221, "learning_rate": 2e-07, "loss": 0.0417, "step": 586 }, { "clip_ratio/high_max": 0.0021159143070690334, "clip_ratio/high_mean": 0.000821098079541116, "clip_ratio/low_mean": 0.0005949813203187659, "clip_ratio/low_min": 1.9906841316696955e-05, "clip_ratio/region_mean": 0.001416079368937062, "epoch": 0.054786529700342416, "grad_norm": 0.13925187289714813, "learning_rate": 2e-07, "loss": 0.0206, "step": 587 }, { "clip_ratio/high_max": 0.0016702330503903795, "clip_ratio/high_mean": 0.0007274825493368553, "clip_ratio/low_mean": 0.0007227761498143082, "clip_ratio/low_min": 6.622693763347343e-05, "clip_ratio/region_mean": 0.0014502586782327853, "epoch": 0.054879862800343, "grad_norm": 0.14801770448684692, "learning_rate": 2e-07, "loss": 0.0419, "step": 588 }, { "clip_ratio/high_max": 0.001920305279782042, "clip_ratio/high_mean": 0.0007233480228023836, "clip_ratio/low_mean": 0.0006516732264572056, "clip_ratio/low_min": 4.07285961046e-05, "clip_ratio/region_mean": 0.0013750212674494833, "epoch": 0.05497319590034358, "grad_norm": 0.1267736256122589, "learning_rate": 2e-07, "loss": 0.0766, "step": 589 }, { "clip_ratio/high_max": 0.002247139454993885, "clip_ratio/high_mean": 0.0008423924518865533, "clip_ratio/low_mean": 0.000602574042204651, "clip_ratio/low_min": 4.9910197958524805e-05, "clip_ratio/region_mean": 0.001444966510462109, "epoch": 0.055066529000344165, "grad_norm": 0.1370629519224167, "learning_rate": 2e-07, "loss": 0.0242, "step": 590 }, { "clip_ratio/high_max": 0.001926904900756199, "clip_ratio/high_mean": 0.0007684007177886087, "clip_ratio/low_mean": 0.0006107104454713408, "clip_ratio/low_min": 7.084257958922535e-05, "clip_ratio/region_mean": 0.0013791111705359071, "epoch": 0.05515986210034475, "grad_norm": 0.1302701234817505, "learning_rate": 2e-07, "loss": 0.0357, "step": 591 }, { "clip_ratio/high_max": 0.001458942253520945, "clip_ratio/high_mean": 0.0006639108632953139, "clip_ratio/low_mean": 0.0005597411009148345, "clip_ratio/low_min": 1.5107565559446812e-05, "clip_ratio/region_mean": 0.001223651986947516, "epoch": 0.055253195200345336, "grad_norm": 0.12280702590942383, "learning_rate": 2e-07, "loss": 0.0461, "step": 592 }, { "clip_ratio/high_max": 0.002324761400814168, "clip_ratio/high_mean": 0.000818258475192124, "clip_ratio/low_mean": 0.0005714063581763185, "clip_ratio/low_min": 7.952549276524223e-05, "clip_ratio/region_mean": 0.001389664837915916, "epoch": 0.055346528300345914, "grad_norm": 0.1272321194410324, "learning_rate": 2e-07, "loss": 0.0473, "step": 593 }, { "clip_ratio/high_max": 0.002146025733964052, "clip_ratio/high_mean": 0.0008538049933122238, "clip_ratio/low_mean": 0.0006849107248854125, "clip_ratio/low_min": 9.263393803848885e-05, "clip_ratio/region_mean": 0.001538715725473594, "epoch": 0.0554398614003465, "grad_norm": 0.13797812163829803, "learning_rate": 2e-07, "loss": 0.0251, "step": 594 }, { "clip_ratio/high_max": 0.0020570386623148806, "clip_ratio/high_mean": 0.000757381420044112, "clip_ratio/low_mean": 0.0005548200570046902, "clip_ratio/low_min": 1.9564877220545895e-05, "clip_ratio/region_mean": 0.001312201471591834, "epoch": 0.055533194500347084, "grad_norm": 0.12918128073215485, "learning_rate": 2e-07, "loss": 0.0191, "step": 595 }, { "clip_ratio/high_max": 0.001484809909015894, "clip_ratio/high_mean": 0.0006596066650672583, "clip_ratio/low_mean": 0.0006142302136140643, "clip_ratio/low_min": 6.065852994652232e-05, "clip_ratio/region_mean": 0.0012738368786813226, "epoch": 0.05562652760034766, "grad_norm": 0.1280878633260727, "learning_rate": 2e-07, "loss": 0.0129, "step": 596 }, { "clip_ratio/high_max": 0.0016571496526012197, "clip_ratio/high_mean": 0.0006969525056774728, "clip_ratio/low_mean": 0.0005855164017702918, "clip_ratio/low_min": 2.7480021344672423e-05, "clip_ratio/region_mean": 0.001282468918361701, "epoch": 0.05571986070034825, "grad_norm": 0.12600445747375488, "learning_rate": 2e-07, "loss": 0.026, "step": 597 }, { "clip_ratio/high_max": 0.0017546714225318283, "clip_ratio/high_mean": 0.0007153079295676434, "clip_ratio/low_mean": 0.0006462242308771238, "clip_ratio/low_min": 3.9743220440868754e-05, "clip_ratio/region_mean": 0.0013615321440738626, "epoch": 0.05581319380034883, "grad_norm": 0.12563100457191467, "learning_rate": 2e-07, "loss": 0.0247, "step": 598 }, { "clip_ratio/high_max": 0.001701915629382711, "clip_ratio/high_mean": 0.0008425342675764114, "clip_ratio/low_mean": 0.0005406459322330193, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013831802243657876, "epoch": 0.05590652690034942, "grad_norm": 0.11960609257221222, "learning_rate": 2e-07, "loss": -0.0095, "step": 599 }, { "clip_ratio/high_max": 0.0017502158334536944, "clip_ratio/high_mean": 0.0006924674089532346, "clip_ratio/low_mean": 0.0005254591114862706, "clip_ratio/low_min": 3.9098690649552736e-05, "clip_ratio/region_mean": 0.0012179265067970846, "epoch": 0.055999860000349996, "grad_norm": 0.12083174288272858, "learning_rate": 2e-07, "loss": 0.0446, "step": 600 }, { "clip_ratio/high_max": 0.0017851490556495264, "clip_ratio/high_mean": 0.0007400912472803611, "clip_ratio/low_mean": 0.0006195292617121595, "clip_ratio/low_min": 5.454041729535675e-05, "clip_ratio/region_mean": 0.0013596205062640365, "epoch": 0.05609319310035058, "grad_norm": 0.12879344820976257, "learning_rate": 2e-07, "loss": 0.0557, "step": 601 }, { "clip_ratio/high_max": 0.00202664191419899, "clip_ratio/high_mean": 0.0008406742454099003, "clip_ratio/low_mean": 0.0005972513536107726, "clip_ratio/low_min": 1.4888041732774582e-05, "clip_ratio/region_mean": 0.001437925617210567, "epoch": 0.05618652620035117, "grad_norm": 0.13380761444568634, "learning_rate": 2e-07, "loss": 0.018, "step": 602 }, { "clip_ratio/high_max": 0.0018286904160049744, "clip_ratio/high_mean": 0.0008291263493447332, "clip_ratio/low_mean": 0.000586018886679085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014151452414807864, "epoch": 0.05627985930035175, "grad_norm": 0.1243838295340538, "learning_rate": 2e-07, "loss": -0.0056, "step": 603 }, { "clip_ratio/high_max": 0.002021389314904809, "clip_ratio/high_mean": 0.000783167670306284, "clip_ratio/low_mean": 0.0006176439237606246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014008116158947814, "epoch": 0.05637319240035233, "grad_norm": 0.13788427412509918, "learning_rate": 2e-07, "loss": 0.0089, "step": 604 }, { "clip_ratio/high_max": 0.002049003989668563, "clip_ratio/high_mean": 0.0008164793744072085, "clip_ratio/low_mean": 0.0006003318676448544, "clip_ratio/low_min": 1.78271507138561e-05, "clip_ratio/region_mean": 0.0014168112757033668, "epoch": 0.056466525500352915, "grad_norm": 0.1401062160730362, "learning_rate": 2e-07, "loss": 0.0345, "step": 605 }, { "clip_ratio/high_max": 0.002008176270464901, "clip_ratio/high_mean": 0.0007043196419544984, "clip_ratio/low_mean": 0.000647723391011823, "clip_ratio/low_min": 1.3793864127364941e-05, "clip_ratio/region_mean": 0.0013520430475182366, "epoch": 0.0565598586003535, "grad_norm": 0.13014359772205353, "learning_rate": 2e-07, "loss": -0.0041, "step": 606 }, { "clip_ratio/high_max": 0.0017567013164807577, "clip_ratio/high_mean": 0.0007358157290582312, "clip_ratio/low_mean": 0.0005898358031117823, "clip_ratio/low_min": 5.9480782510945573e-05, "clip_ratio/region_mean": 0.001325651533989003, "epoch": 0.056653191700354086, "grad_norm": 0.1213982030749321, "learning_rate": 2e-07, "loss": 0.056, "step": 607 }, { "clip_ratio/high_max": 0.001961809586646268, "clip_ratio/high_mean": 0.0007007364365563262, "clip_ratio/low_mean": 0.000553050128473842, "clip_ratio/low_min": 4.1119532397715375e-05, "clip_ratio/region_mean": 0.0012537865732156206, "epoch": 0.056746524800354664, "grad_norm": 0.1423816680908203, "learning_rate": 2e-07, "loss": 0.0004, "step": 608 }, { "clip_ratio/high_max": 0.0020472863943723496, "clip_ratio/high_mean": 0.0007904565791250207, "clip_ratio/low_mean": 0.000658993001707131, "clip_ratio/low_min": 2.641157880134415e-05, "clip_ratio/region_mean": 0.0014494495699182153, "epoch": 0.05683985790035525, "grad_norm": 0.12972383201122284, "learning_rate": 2e-07, "loss": 0.0263, "step": 609 }, { "clip_ratio/high_max": 0.00197420613039867, "clip_ratio/high_mean": 0.0007404360676446231, "clip_ratio/low_mean": 0.0007119301062630257, "clip_ratio/low_min": 5.044778299634345e-05, "clip_ratio/region_mean": 0.0014523661739076488, "epoch": 0.056933191000355834, "grad_norm": 0.12125332653522491, "learning_rate": 2e-07, "loss": 0.0261, "step": 610 }, { "clip_ratio/high_max": 0.0016314583299390506, "clip_ratio/high_mean": 0.0006676061620964902, "clip_ratio/low_mean": 0.000494060466735391, "clip_ratio/low_min": 3.808809742622543e-05, "clip_ratio/region_mean": 0.001161666619736934, "epoch": 0.05702652410035641, "grad_norm": 0.11677919328212738, "learning_rate": 2e-07, "loss": -0.0053, "step": 611 }, { "clip_ratio/high_max": 0.0019200336428184528, "clip_ratio/high_mean": 0.0008373005730391014, "clip_ratio/low_mean": 0.0005993667964503402, "clip_ratio/low_min": 5.141816745890537e-05, "clip_ratio/region_mean": 0.0014366673422046006, "epoch": 0.057119857200357, "grad_norm": 0.1325366199016571, "learning_rate": 2e-07, "loss": 0.0112, "step": 612 }, { "clip_ratio/high_max": 0.0022988273776718415, "clip_ratio/high_mean": 0.0008544237734895432, "clip_ratio/low_mean": 0.0006186852151586208, "clip_ratio/low_min": 1.9818200598820113e-05, "clip_ratio/region_mean": 0.0014731090086570475, "epoch": 0.05721319030035758, "grad_norm": 0.13329392671585083, "learning_rate": 2e-07, "loss": 0.0079, "step": 613 }, { "clip_ratio/high_max": 0.001607274854904972, "clip_ratio/high_mean": 0.0006836909906269284, "clip_ratio/low_mean": 0.0005559453538808157, "clip_ratio/low_min": 4.1066603444051e-05, "clip_ratio/region_mean": 0.0012396363636071328, "epoch": 0.05730652340035817, "grad_norm": 0.12452827394008636, "learning_rate": 2e-07, "loss": 0.0205, "step": 614 }, { "clip_ratio/high_max": 0.0019441236399870832, "clip_ratio/high_mean": 0.0008244753080361988, "clip_ratio/low_mean": 0.0005469589377753437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001371434216707712, "epoch": 0.05739985650035875, "grad_norm": 0.1297132670879364, "learning_rate": 2e-07, "loss": -0.027, "step": 615 }, { "clip_ratio/high_max": 0.0019316154903208371, "clip_ratio/high_mean": 0.0007169001255533658, "clip_ratio/low_mean": 0.0006500572326331167, "clip_ratio/low_min": 3.724763382706442e-05, "clip_ratio/region_mean": 0.0013669573127117474, "epoch": 0.05749318960035933, "grad_norm": 0.13498789072036743, "learning_rate": 2e-07, "loss": 0.0422, "step": 616 }, { "clip_ratio/high_max": 0.0017218805551237892, "clip_ratio/high_mean": 0.0006264360190471052, "clip_ratio/low_mean": 0.0006759300686098868, "clip_ratio/low_min": 4.004026050097309e-05, "clip_ratio/region_mean": 0.001302366072195582, "epoch": 0.05758652270035992, "grad_norm": 0.12367643415927887, "learning_rate": 2e-07, "loss": 0.0372, "step": 617 }, { "clip_ratio/high_max": 0.0018927859782706946, "clip_ratio/high_mean": 0.0007719259720033733, "clip_ratio/low_mean": 0.0006304375892796088, "clip_ratio/low_min": 6.857335120002972e-05, "clip_ratio/region_mean": 0.0014023635449120775, "epoch": 0.0576798558003605, "grad_norm": 0.20969240367412567, "learning_rate": 2e-07, "loss": 0.0395, "step": 618 }, { "clip_ratio/high_max": 0.001919278882269282, "clip_ratio/high_mean": 0.000784878313424997, "clip_ratio/low_mean": 0.0005481541775225196, "clip_ratio/low_min": 4.427554631547537e-05, "clip_ratio/region_mean": 0.0013330324582057074, "epoch": 0.05777318890036108, "grad_norm": 0.12936751544475555, "learning_rate": 2e-07, "loss": -0.0004, "step": 619 }, { "clip_ratio/high_max": 0.001853934198152274, "clip_ratio/high_mean": 0.000756432087655412, "clip_ratio/low_mean": 0.0005492968357430073, "clip_ratio/low_min": 5.417522515926976e-05, "clip_ratio/region_mean": 0.001305728899751557, "epoch": 0.057866522000361666, "grad_norm": 0.11905515938997269, "learning_rate": 2e-07, "loss": 0.0187, "step": 620 }, { "clip_ratio/high_max": 0.0016429902607342228, "clip_ratio/high_mean": 0.0006806379860790912, "clip_ratio/low_mean": 0.0006305058595899027, "clip_ratio/low_min": 8.865139261615695e-05, "clip_ratio/region_mean": 0.0013111438165651634, "epoch": 0.05795985510036225, "grad_norm": 0.14776545763015747, "learning_rate": 2e-07, "loss": 0.0198, "step": 621 }, { "clip_ratio/high_max": 0.0019432760600466281, "clip_ratio/high_mean": 0.0007690458769502584, "clip_ratio/low_mean": 0.0006771973912691465, "clip_ratio/low_min": 7.531232040491886e-05, "clip_ratio/region_mean": 0.001446243255486479, "epoch": 0.05805318820036283, "grad_norm": 0.11562264710664749, "learning_rate": 2e-07, "loss": 0.0229, "step": 622 }, { "clip_ratio/high_max": 0.001964599148777779, "clip_ratio/high_mean": 0.0007723728667770047, "clip_ratio/low_mean": 0.0005966082489976543, "clip_ratio/low_min": 0.00010300356461812044, "clip_ratio/region_mean": 0.0013689811130461749, "epoch": 0.058146521300363414, "grad_norm": 0.13284693658351898, "learning_rate": 2e-07, "loss": 0.0567, "step": 623 }, { "clip_ratio/high_max": 0.0020104213690501638, "clip_ratio/high_mean": 0.0008271292572317179, "clip_ratio/low_mean": 0.0006113947401900077, "clip_ratio/low_min": 2.2827407519798726e-05, "clip_ratio/region_mean": 0.001438524017430609, "epoch": 0.058239854400364, "grad_norm": 0.13532742857933044, "learning_rate": 2e-07, "loss": 0.0416, "step": 624 }, { "clip_ratio/high_max": 0.0020523898565443233, "clip_ratio/high_mean": 0.0007984798667166615, "clip_ratio/low_mean": 0.0006914174209669, "clip_ratio/low_min": 4.993927313989843e-05, "clip_ratio/region_mean": 0.0014898972876835614, "epoch": 0.058333187500364585, "grad_norm": 0.13844189047813416, "learning_rate": 2e-07, "loss": 0.0361, "step": 625 }, { "clip_ratio/high_max": 0.0023537332672276534, "clip_ratio/high_mean": 0.0008083535030891653, "clip_ratio/low_mean": 0.000564597527954902, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001372951013763668, "epoch": 0.05842652060036516, "grad_norm": 0.9572961926460266, "learning_rate": 2e-07, "loss": 0.049, "step": 626 }, { "clip_ratio/high_max": 0.0019701937708305195, "clip_ratio/high_mean": 0.0007563625840703025, "clip_ratio/low_mean": 0.0006133209008112317, "clip_ratio/low_min": 4.470934345590649e-05, "clip_ratio/region_mean": 0.0013696834648726508, "epoch": 0.05851985370036575, "grad_norm": 0.13144297897815704, "learning_rate": 2e-07, "loss": 0.0306, "step": 627 }, { "clip_ratio/high_max": 0.002237796004919801, "clip_ratio/high_mean": 0.000969190637988504, "clip_ratio/low_mean": 0.0005686430158675648, "clip_ratio/low_min": 2.095059880957706e-05, "clip_ratio/region_mean": 0.001537833668407984, "epoch": 0.05861318680036633, "grad_norm": 0.1319931000471115, "learning_rate": 2e-07, "loss": -0.052, "step": 628 }, { "clip_ratio/high_max": 0.002166531510738423, "clip_ratio/high_mean": 0.0007745832235741545, "clip_ratio/low_mean": 0.0006726450701535214, "clip_ratio/low_min": 2.0113709979341365e-05, "clip_ratio/region_mean": 0.0014472282819042448, "epoch": 0.05870651990036692, "grad_norm": 0.12749607861042023, "learning_rate": 2e-07, "loss": -0.0006, "step": 629 }, { "clip_ratio/high_max": 0.0021250838217383716, "clip_ratio/high_mean": 0.0008440735055046389, "clip_ratio/low_mean": 0.0006307784060481936, "clip_ratio/low_min": 6.43731809759629e-05, "clip_ratio/region_mean": 0.00147485194975161, "epoch": 0.0587998530003675, "grad_norm": 0.12356182932853699, "learning_rate": 2e-07, "loss": -0.0004, "step": 630 }, { "clip_ratio/high_max": 0.0019235118888900615, "clip_ratio/high_mean": 0.0007689325975661632, "clip_ratio/low_mean": 0.0007173232024797471, "clip_ratio/low_min": 4.3901057324546855e-05, "clip_ratio/region_mean": 0.0014862557654851116, "epoch": 0.05889318610036808, "grad_norm": 0.13854312896728516, "learning_rate": 2e-07, "loss": 0.0306, "step": 631 }, { "clip_ratio/high_max": 0.0018317053436476272, "clip_ratio/high_mean": 0.0007822150928404881, "clip_ratio/low_mean": 0.0006304732542048441, "clip_ratio/low_min": 1.80792594619561e-05, "clip_ratio/region_mean": 0.0014126883797871415, "epoch": 0.05898651920036867, "grad_norm": 0.12596453726291656, "learning_rate": 2e-07, "loss": 0.0126, "step": 632 }, { "clip_ratio/high_max": 0.001970906312635634, "clip_ratio/high_mean": 0.0007788934872223763, "clip_ratio/low_mean": 0.0007234407094074413, "clip_ratio/low_min": 6.702151040371973e-05, "clip_ratio/region_mean": 0.0015023341911728494, "epoch": 0.05907985230036925, "grad_norm": 0.12849193811416626, "learning_rate": 2e-07, "loss": 0.0854, "step": 633 }, { "clip_ratio/high_max": 0.0018440183775965124, "clip_ratio/high_mean": 0.0007803206408425467, "clip_ratio/low_mean": 0.0006907630377099849, "clip_ratio/low_min": 1.703229281702079e-05, "clip_ratio/region_mean": 0.0014710836330777965, "epoch": 0.05917318540036983, "grad_norm": 0.1286572962999344, "learning_rate": 2e-07, "loss": 0.011, "step": 634 }, { "clip_ratio/high_max": 0.0019412162801017985, "clip_ratio/high_mean": 0.0007511263193009654, "clip_ratio/low_mean": 0.000684510347127798, "clip_ratio/low_min": 4.662322589865653e-05, "clip_ratio/region_mean": 0.0014356366882566363, "epoch": 0.059266518500370416, "grad_norm": 0.13404014706611633, "learning_rate": 2e-07, "loss": 0.0323, "step": 635 }, { "clip_ratio/high_max": 0.0015266533373505808, "clip_ratio/high_mean": 0.0006573283280886244, "clip_ratio/low_mean": 0.0007011635007074801, "clip_ratio/low_min": 2.815088282659417e-05, "clip_ratio/region_mean": 0.0013584918160631787, "epoch": 0.059359851600371, "grad_norm": 0.11963405460119247, "learning_rate": 2e-07, "loss": 0.0402, "step": 636 }, { "clip_ratio/high_max": 0.002044441967882449, "clip_ratio/high_mean": 0.0008332400466315448, "clip_ratio/low_mean": 0.0006260903501242865, "clip_ratio/low_min": 3.764297889574664e-05, "clip_ratio/region_mean": 0.001459330422221683, "epoch": 0.05945318470037158, "grad_norm": 0.1403578370809555, "learning_rate": 2e-07, "loss": 0.0323, "step": 637 }, { "clip_ratio/high_max": 0.001747186659486033, "clip_ratio/high_mean": 0.0007683554540562909, "clip_ratio/low_mean": 0.0007259731373778777, "clip_ratio/low_min": 9.530715442451765e-05, "clip_ratio/region_mean": 0.0014943286187190097, "epoch": 0.059546517800372165, "grad_norm": 0.15727828443050385, "learning_rate": 2e-07, "loss": 0.0381, "step": 638 }, { "clip_ratio/high_max": 0.0020102968665014487, "clip_ratio/high_mean": 0.0007979989732120885, "clip_ratio/low_mean": 0.0005875053593626944, "clip_ratio/low_min": 1.1825922229036223e-05, "clip_ratio/region_mean": 0.001385504350764677, "epoch": 0.05963985090037275, "grad_norm": 0.12717203795909882, "learning_rate": 2e-07, "loss": 0.0188, "step": 639 }, { "clip_ratio/high_max": 0.0016796312702354044, "clip_ratio/high_mean": 0.0006957236291782465, "clip_ratio/low_mean": 0.0006574747694685357, "clip_ratio/low_min": 8.163578058884013e-05, "clip_ratio/region_mean": 0.0013531984041037504, "epoch": 0.059733184000373335, "grad_norm": 0.11941295862197876, "learning_rate": 2e-07, "loss": 0.0223, "step": 640 }, { "clip_ratio/high_max": 0.0017423394383513369, "clip_ratio/high_mean": 0.0007042491397442063, "clip_ratio/low_mean": 0.000573540504774428, "clip_ratio/low_min": 4.313974295655498e-05, "clip_ratio/region_mean": 0.001277789669984486, "completions/clipped_ratio": 0.016802106584821397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 657.5347900390625, "completions/mean_terminated_length": 598.7739868164062, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.05982651710037391, "grad_norm": 0.13230295479297638, "learning_rate": 2e-07, "loss": 0.0184, "num_tokens": 502405925.0, "reward": 0.5842459797859192, "reward_std": 0.19038790464401245, "rewards/simpleverify_reward/mean": 0.5842459797859192, "rewards/simpleverify_reward/std": 0.49285364151000977, "step": 641 }, { "clip_ratio/high_max": 0.0015028251145849936, "clip_ratio/high_mean": 0.0006032494839018909, "clip_ratio/low_mean": 0.0006449195443565259, "clip_ratio/low_min": 4.8747007895144634e-05, "clip_ratio/region_mean": 0.0012481690100685228, "epoch": 0.0599198502003745, "grad_norm": 0.13166922330856323, "learning_rate": 2e-07, "loss": 0.0487, "step": 642 }, { "clip_ratio/high_max": 0.001954842133272905, "clip_ratio/high_mean": 0.0007006585929048015, "clip_ratio/low_mean": 0.00046967056550784037, "clip_ratio/low_min": 1.3560425031755585e-05, "clip_ratio/region_mean": 0.0011703291711455677, "epoch": 0.060013183300375084, "grad_norm": 0.1293753981590271, "learning_rate": 2e-07, "loss": 0.0022, "step": 643 }, { "clip_ratio/high_max": 0.0019306345930090174, "clip_ratio/high_mean": 0.000710837999577052, "clip_ratio/low_mean": 0.00047347288273158483, "clip_ratio/low_min": 6.865114301035646e-06, "clip_ratio/region_mean": 0.0011843108586617745, "epoch": 0.06010651640037567, "grad_norm": 0.12771207094192505, "learning_rate": 2e-07, "loss": -0.0067, "step": 644 }, { "clip_ratio/high_max": 0.001788378882338293, "clip_ratio/high_mean": 0.0007058359806251246, "clip_ratio/low_mean": 0.0005115430540172383, "clip_ratio/low_min": 3.835004372376716e-05, "clip_ratio/region_mean": 0.0012173790128144901, "epoch": 0.06019984950037625, "grad_norm": 0.48159095644950867, "learning_rate": 2e-07, "loss": 0.0321, "step": 645 }, { "clip_ratio/high_max": 0.0021168722305446863, "clip_ratio/high_mean": 0.0007490512161894003, "clip_ratio/low_mean": 0.0006361779996950645, "clip_ratio/low_min": 3.291979146524682e-05, "clip_ratio/region_mean": 0.0013852292540832423, "epoch": 0.06029318260037683, "grad_norm": 0.1200612485408783, "learning_rate": 2e-07, "loss": 0.0187, "step": 646 }, { "clip_ratio/high_max": 0.0016230874316534027, "clip_ratio/high_mean": 0.0006015648868924472, "clip_ratio/low_mean": 0.0006159587674119393, "clip_ratio/low_min": 2.6342974706494715e-05, "clip_ratio/region_mean": 0.0012175236188340932, "epoch": 0.06038651570037742, "grad_norm": 0.11347278207540512, "learning_rate": 2e-07, "loss": 0.0471, "step": 647 }, { "clip_ratio/high_max": 0.0018738950711849611, "clip_ratio/high_mean": 0.0007123055756892427, "clip_ratio/low_mean": 0.0005526820277736988, "clip_ratio/low_min": 1.7337031749775633e-05, "clip_ratio/region_mean": 0.0012649875934584998, "epoch": 0.060479848800377996, "grad_norm": 0.15207113325595856, "learning_rate": 2e-07, "loss": 0.0579, "step": 648 }, { "clip_ratio/high_max": 0.0016506957617821172, "clip_ratio/high_mean": 0.0006782404234400019, "clip_ratio/low_mean": 0.0005686398226316669, "clip_ratio/low_min": 2.8563290470629e-05, "clip_ratio/region_mean": 0.001246880266990047, "epoch": 0.06057318190037858, "grad_norm": 0.12372089922428131, "learning_rate": 2e-07, "loss": 0.0521, "step": 649 }, { "clip_ratio/high_max": 0.001653977349633351, "clip_ratio/high_mean": 0.0007160591285355622, "clip_ratio/low_mean": 0.0005693523125955835, "clip_ratio/low_min": 1.0982253115798812e-05, "clip_ratio/region_mean": 0.0012854114393121563, "epoch": 0.060666515000379166, "grad_norm": 0.12325287610292435, "learning_rate": 2e-07, "loss": 0.0323, "step": 650 }, { "clip_ratio/high_max": 0.002118610129400622, "clip_ratio/high_mean": 0.0008132473249133909, "clip_ratio/low_mean": 0.0005348450949895778, "clip_ratio/low_min": 1.1079595424234867e-05, "clip_ratio/region_mean": 0.0013480924390023574, "epoch": 0.06075984810037975, "grad_norm": 0.12609155476093292, "learning_rate": 2e-07, "loss": -0.0298, "step": 651 }, { "clip_ratio/high_max": 0.0018145137692044955, "clip_ratio/high_mean": 0.0006364323307934683, "clip_ratio/low_mean": 0.0005615529062197311, "clip_ratio/low_min": 2.166505146306008e-05, "clip_ratio/region_mean": 0.001197985217004316, "epoch": 0.06085318120038033, "grad_norm": 0.13217772543430328, "learning_rate": 2e-07, "loss": 0.062, "step": 652 }, { "clip_ratio/high_max": 0.001685916467977222, "clip_ratio/high_mean": 0.0006090159859013511, "clip_ratio/low_mean": 0.0006306676805252209, "clip_ratio/low_min": 5.871406028745696e-05, "clip_ratio/region_mean": 0.0012396836245898157, "epoch": 0.060946514300380915, "grad_norm": 0.12306054681539536, "learning_rate": 2e-07, "loss": 0.0656, "step": 653 }, { "clip_ratio/high_max": 0.0016424743953393772, "clip_ratio/high_mean": 0.0006845913958386518, "clip_ratio/low_mean": 0.0005167244721633324, "clip_ratio/low_min": 2.889035204134416e-05, "clip_ratio/region_mean": 0.001201315870275721, "epoch": 0.0610398474003815, "grad_norm": 0.11775021255016327, "learning_rate": 2e-07, "loss": 0.0117, "step": 654 }, { "clip_ratio/high_max": 0.0020380984351504594, "clip_ratio/high_mean": 0.0007839371555746766, "clip_ratio/low_mean": 0.0006122116101323627, "clip_ratio/low_min": 5.395538300945191e-05, "clip_ratio/region_mean": 0.0013961487748019863, "epoch": 0.061133180500382085, "grad_norm": 0.14710205793380737, "learning_rate": 2e-07, "loss": 0.0476, "step": 655 }, { "clip_ratio/high_max": 0.0017588314403838012, "clip_ratio/high_mean": 0.0007149065477278782, "clip_ratio/low_mean": 0.000559953332412988, "clip_ratio/low_min": 8.326056558871642e-05, "clip_ratio/region_mean": 0.0012748598455800675, "epoch": 0.061226513600382663, "grad_norm": 0.12796972692012787, "learning_rate": 2e-07, "loss": 0.0619, "step": 656 }, { "clip_ratio/high_max": 0.0014832530505373143, "clip_ratio/high_mean": 0.0006230037251953036, "clip_ratio/low_mean": 0.0005755038291681558, "clip_ratio/low_min": 8.719855759409256e-05, "clip_ratio/region_mean": 0.0011985075871052686, "epoch": 0.06131984670038325, "grad_norm": 0.11228270828723907, "learning_rate": 2e-07, "loss": 0.036, "step": 657 }, { "clip_ratio/high_max": 0.0018529851477069315, "clip_ratio/high_mean": 0.0006681571121589513, "clip_ratio/low_mean": 0.0005821256563649513, "clip_ratio/low_min": 2.235556621599244e-05, "clip_ratio/region_mean": 0.0012502827557909768, "epoch": 0.061413179800383834, "grad_norm": 52775.59765625, "learning_rate": 2e-07, "loss": 2.0578, "step": 658 }, { "clip_ratio/high_max": 0.0015632145241397666, "clip_ratio/high_mean": 0.0006187711987877265, "clip_ratio/low_mean": 0.0005859623215656029, "clip_ratio/low_min": 3.3528843232488725e-05, "clip_ratio/region_mean": 0.0012047335112583824, "epoch": 0.06150651290038442, "grad_norm": 0.12432112544775009, "learning_rate": 2e-07, "loss": 0.0526, "step": 659 }, { "clip_ratio/high_max": 0.0014263695156842005, "clip_ratio/high_mean": 0.0006495874622487463, "clip_ratio/low_mean": 0.00048281699082508567, "clip_ratio/low_min": 4.417094805830857e-05, "clip_ratio/region_mean": 0.0011324044462526217, "epoch": 0.061599846000385, "grad_norm": 0.11659005284309387, "learning_rate": 2e-07, "loss": -0.0104, "step": 660 }, { "clip_ratio/high_max": 0.0016166997447726317, "clip_ratio/high_mean": 0.0007174753482104279, "clip_ratio/low_mean": 0.0005444566204459989, "clip_ratio/low_min": 3.9566070881846827e-05, "clip_ratio/region_mean": 0.001261931967746932, "epoch": 0.06169317910038558, "grad_norm": 0.11360806971788406, "learning_rate": 2e-07, "loss": 0.0204, "step": 661 }, { "clip_ratio/high_max": 0.0020287233346607536, "clip_ratio/high_mean": 0.0007829574824427254, "clip_ratio/low_mean": 0.0005669868078257423, "clip_ratio/low_min": 2.9407152396743186e-05, "clip_ratio/region_mean": 0.0013499443084583618, "epoch": 0.06178651220038617, "grad_norm": 0.11427122354507446, "learning_rate": 2e-07, "loss": 0.0314, "step": 662 }, { "clip_ratio/high_max": 0.0018698536368901841, "clip_ratio/high_mean": 0.0007680224589421414, "clip_ratio/low_mean": 0.000497693049510417, "clip_ratio/low_min": 1.540927041787654e-05, "clip_ratio/region_mean": 0.0012657155166380107, "epoch": 0.061879845300386746, "grad_norm": 0.12936750054359436, "learning_rate": 2e-07, "loss": 0.0193, "step": 663 }, { "clip_ratio/high_max": 0.002091225585900247, "clip_ratio/high_mean": 0.0007886120911280159, "clip_ratio/low_mean": 0.0004546863688119629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012432984440238215, "epoch": 0.06197317840038733, "grad_norm": 0.10685745626688004, "learning_rate": 2e-07, "loss": -0.0091, "step": 664 }, { "clip_ratio/high_max": 0.0016517172953172121, "clip_ratio/high_mean": 0.0006277057273109676, "clip_ratio/low_mean": 0.0005898472163607948, "clip_ratio/low_min": 7.325894785026321e-05, "clip_ratio/region_mean": 0.001217552930029342, "epoch": 0.062066511500387916, "grad_norm": 0.12509962916374207, "learning_rate": 2e-07, "loss": 0.0568, "step": 665 }, { "clip_ratio/high_max": 0.0017785399031708948, "clip_ratio/high_mean": 0.0006465633487096056, "clip_ratio/low_mean": 0.00047769745833647903, "clip_ratio/low_min": 2.0256680272723315e-05, "clip_ratio/region_mean": 0.0011242608015891165, "epoch": 0.0621598446003885, "grad_norm": 0.14455284178256989, "learning_rate": 2e-07, "loss": 0.0412, "step": 666 }, { "clip_ratio/high_max": 0.001821024139644578, "clip_ratio/high_mean": 0.0008029854543565307, "clip_ratio/low_mean": 0.0005656461144099012, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013686315724044107, "epoch": 0.06225317770038908, "grad_norm": 0.12351208925247192, "learning_rate": 2e-07, "loss": -0.0174, "step": 667 }, { "clip_ratio/high_max": 0.0019680398763739504, "clip_ratio/high_mean": 0.0008126700558932498, "clip_ratio/low_mean": 0.0005225878221608582, "clip_ratio/low_min": 1.5672016161261126e-05, "clip_ratio/region_mean": 0.0013352578826015815, "epoch": 0.062346510800389665, "grad_norm": 0.13760676980018616, "learning_rate": 2e-07, "loss": 0.0136, "step": 668 }, { "clip_ratio/high_max": 0.001881914162368048, "clip_ratio/high_mean": 0.0007741589997749543, "clip_ratio/low_mean": 0.0005514615650099586, "clip_ratio/low_min": 7.84289113653358e-06, "clip_ratio/region_mean": 0.0013256205784273334, "epoch": 0.06243984390039025, "grad_norm": 0.13342776894569397, "learning_rate": 2e-07, "loss": 0.02, "step": 669 }, { "clip_ratio/high_max": 0.001754728626110591, "clip_ratio/high_mean": 0.0006540088870679028, "clip_ratio/low_mean": 0.0006097197365306783, "clip_ratio/low_min": 4.9522039262228645e-05, "clip_ratio/region_mean": 0.00126372862723656, "epoch": 0.06253317700039084, "grad_norm": 0.1284882128238678, "learning_rate": 2e-07, "loss": 0.0101, "step": 670 }, { "clip_ratio/high_max": 0.0019851217948598787, "clip_ratio/high_mean": 0.000803604707471095, "clip_ratio/low_mean": 0.0005087060053483583, "clip_ratio/low_min": 2.229356141469907e-05, "clip_ratio/region_mean": 0.0013123106655257288, "epoch": 0.06262651010039141, "grad_norm": 0.14094722270965576, "learning_rate": 2e-07, "loss": -0.0074, "step": 671 }, { "clip_ratio/high_max": 0.0016339335852535442, "clip_ratio/high_mean": 0.0007240754148369888, "clip_ratio/low_mean": 0.0005846838666911935, "clip_ratio/low_min": 3.610417934396537e-05, "clip_ratio/region_mean": 0.001308759266976267, "epoch": 0.062719843200392, "grad_norm": 0.12171252816915512, "learning_rate": 2e-07, "loss": 0.0019, "step": 672 }, { "clip_ratio/high_max": 0.0016549150641367305, "clip_ratio/high_mean": 0.0006692086608381942, "clip_ratio/low_mean": 0.0006425303654395975, "clip_ratio/low_min": 4.715068916993914e-05, "clip_ratio/region_mean": 0.0013117390481056646, "epoch": 0.06281317630039258, "grad_norm": 0.11812646687030792, "learning_rate": 2e-07, "loss": 0.0799, "step": 673 }, { "clip_ratio/high_max": 0.0018441741267452016, "clip_ratio/high_mean": 0.0007506917554565007, "clip_ratio/low_mean": 0.0004898564029645058, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012405481575115118, "epoch": 0.06290650940039316, "grad_norm": 0.11893141269683838, "learning_rate": 2e-07, "loss": -0.0029, "step": 674 }, { "clip_ratio/high_max": 0.001666164167545503, "clip_ratio/high_mean": 0.000742921574783395, "clip_ratio/low_mean": 0.0006051517193554901, "clip_ratio/low_min": 7.174543225119123e-05, "clip_ratio/region_mean": 0.001348073285043938, "epoch": 0.06299984250039375, "grad_norm": 0.20682616531848907, "learning_rate": 2e-07, "loss": 0.0202, "step": 675 }, { "clip_ratio/high_max": 0.0018505779844417702, "clip_ratio/high_mean": 0.0006919997986187809, "clip_ratio/low_mean": 0.0006685145599476527, "clip_ratio/low_min": 3.8661462895106524e-05, "clip_ratio/region_mean": 0.0013605143976747058, "epoch": 0.06309317560039433, "grad_norm": 0.1127462238073349, "learning_rate": 2e-07, "loss": 0.0575, "step": 676 }, { "clip_ratio/high_max": 0.001542382062325487, "clip_ratio/high_mean": 0.0006228030470083468, "clip_ratio/low_mean": 0.0005404314233601326, "clip_ratio/low_min": 4.3086947698611766e-05, "clip_ratio/region_mean": 0.0011632344612735324, "epoch": 0.06318650870039491, "grad_norm": 0.13076160848140717, "learning_rate": 2e-07, "loss": 0.0045, "step": 677 }, { "clip_ratio/high_max": 0.0021443464283947833, "clip_ratio/high_mean": 0.0008728145439818036, "clip_ratio/low_mean": 0.0005523702911887085, "clip_ratio/low_min": 3.79922021238599e-05, "clip_ratio/region_mean": 0.0014251848406274803, "epoch": 0.0632798418003955, "grad_norm": 0.1288592368364334, "learning_rate": 2e-07, "loss": -0.0118, "step": 678 }, { "clip_ratio/high_max": 0.0020120497902098577, "clip_ratio/high_mean": 0.0007102653780748369, "clip_ratio/low_mean": 0.000562560423531977, "clip_ratio/low_min": 4.000227272626944e-05, "clip_ratio/region_mean": 0.0012728257825074252, "epoch": 0.06337317490039608, "grad_norm": 0.1331200897693634, "learning_rate": 2e-07, "loss": 0.0519, "step": 679 }, { "clip_ratio/high_max": 0.0018288054161530454, "clip_ratio/high_mean": 0.0007199072488219826, "clip_ratio/low_mean": 0.0005980715468467679, "clip_ratio/low_min": 4.696734413300874e-05, "clip_ratio/region_mean": 0.0013179787783883512, "epoch": 0.06346650800039666, "grad_norm": 0.12625592947006226, "learning_rate": 2e-07, "loss": 0.0412, "step": 680 }, { "clip_ratio/high_max": 0.0021106762360432185, "clip_ratio/high_mean": 0.0007806417006577249, "clip_ratio/low_mean": 0.0005319642650647438, "clip_ratio/low_min": 3.516345350362826e-05, "clip_ratio/region_mean": 0.0013126059420756064, "epoch": 0.06355984110039725, "grad_norm": 0.13658370077610016, "learning_rate": 2e-07, "loss": 0.0018, "step": 681 }, { "clip_ratio/high_max": 0.0015075959672685713, "clip_ratio/high_mean": 0.0006276505018831813, "clip_ratio/low_mean": 0.0006489465158665553, "clip_ratio/low_min": 3.836808809865033e-05, "clip_ratio/region_mean": 0.0012765970350301359, "epoch": 0.06365317420039783, "grad_norm": 0.13418620824813843, "learning_rate": 2e-07, "loss": 0.0537, "step": 682 }, { "clip_ratio/high_max": 0.0016228008389589377, "clip_ratio/high_mean": 0.0006652680986007908, "clip_ratio/low_mean": 0.0005927661004534457, "clip_ratio/low_min": 1.394466744386591e-05, "clip_ratio/region_mean": 0.0012580341863213107, "epoch": 0.06374650730039842, "grad_norm": 0.12442555278539658, "learning_rate": 2e-07, "loss": 0.0231, "step": 683 }, { "clip_ratio/high_max": 0.0018634249390743207, "clip_ratio/high_mean": 0.0006525746757688466, "clip_ratio/low_mean": 0.0005686860476998845, "clip_ratio/low_min": 1.0129660040547606e-05, "clip_ratio/region_mean": 0.0012212607252877206, "epoch": 0.063839840400399, "grad_norm": 0.12511055171489716, "learning_rate": 2e-07, "loss": 0.0395, "step": 684 }, { "clip_ratio/high_max": 0.0017387674888595939, "clip_ratio/high_mean": 0.0006673971001873724, "clip_ratio/low_mean": 0.0005974502801109338, "clip_ratio/low_min": 2.1564604139712173e-05, "clip_ratio/region_mean": 0.0012648473857552744, "epoch": 0.06393317350039958, "grad_norm": 0.12611344456672668, "learning_rate": 2e-07, "loss": 0.0707, "step": 685 }, { "clip_ratio/high_max": 0.0017114603106165305, "clip_ratio/high_mean": 0.0007179043768701376, "clip_ratio/low_mean": 0.0005226764351391466, "clip_ratio/low_min": 1.4891589671606198e-05, "clip_ratio/region_mean": 0.001240580815647263, "epoch": 0.06402650660040017, "grad_norm": 0.1261724978685379, "learning_rate": 2e-07, "loss": 0.005, "step": 686 }, { "clip_ratio/high_max": 0.001738532566378126, "clip_ratio/high_mean": 0.0006687697177767404, "clip_ratio/low_mean": 0.0005617700389848324, "clip_ratio/low_min": 2.4767730792518705e-05, "clip_ratio/region_mean": 0.0012305397758609615, "epoch": 0.06411983970040075, "grad_norm": 0.14246691763401031, "learning_rate": 2e-07, "loss": 0.0376, "step": 687 }, { "clip_ratio/high_max": 0.001610835715837311, "clip_ratio/high_mean": 0.0007581183181173401, "clip_ratio/low_mean": 0.0005493717580975499, "clip_ratio/low_min": 2.5688450477900915e-05, "clip_ratio/region_mean": 0.0013074900780338794, "epoch": 0.06421317280040133, "grad_norm": 0.11837201565504074, "learning_rate": 2e-07, "loss": 0.0017, "step": 688 }, { "clip_ratio/high_max": 0.0017353488874505274, "clip_ratio/high_mean": 0.000733640423277393, "clip_ratio/low_mean": 0.0005343306884242338, "clip_ratio/low_min": 6.563397755598999e-05, "clip_ratio/region_mean": 0.0012679711071541533, "epoch": 0.06430650590040192, "grad_norm": 0.12705519795417786, "learning_rate": 2e-07, "loss": -0.0146, "step": 689 }, { "clip_ratio/high_max": 0.001730902753479313, "clip_ratio/high_mean": 0.0006527771220135037, "clip_ratio/low_mean": 0.0006498619150079321, "clip_ratio/low_min": 7.652586282347329e-05, "clip_ratio/region_mean": 0.0013026390297454782, "epoch": 0.0643998390004025, "grad_norm": 0.14517691731452942, "learning_rate": 2e-07, "loss": 0.0511, "step": 690 }, { "clip_ratio/high_max": 0.001656091775657842, "clip_ratio/high_mean": 0.0006908269551786361, "clip_ratio/low_mean": 0.0005910973231948446, "clip_ratio/low_min": 3.2440868380945176e-05, "clip_ratio/region_mean": 0.0012819242947443854, "epoch": 0.06449317210040308, "grad_norm": 0.11404888331890106, "learning_rate": 2e-07, "loss": 0.0295, "step": 691 }, { "clip_ratio/high_max": 0.0016943153059401084, "clip_ratio/high_mean": 0.0007492045824619709, "clip_ratio/low_mean": 0.0006518374034385488, "clip_ratio/low_min": 0.00010651837510522455, "clip_ratio/region_mean": 0.0014010419581609312, "epoch": 0.06458650520040367, "grad_norm": 0.13084450364112854, "learning_rate": 2e-07, "loss": 0.0515, "step": 692 }, { "clip_ratio/high_max": 0.0013938709416834172, "clip_ratio/high_mean": 0.0005792533711428405, "clip_ratio/low_mean": 0.000583945203516123, "clip_ratio/low_min": 3.434501377341803e-05, "clip_ratio/region_mean": 0.0011631985726126004, "epoch": 0.06467983830040425, "grad_norm": 0.13028603792190552, "learning_rate": 2e-07, "loss": 0.0409, "step": 693 }, { "clip_ratio/high_max": 0.0020387881522765383, "clip_ratio/high_mean": 0.0006953164102014853, "clip_ratio/low_mean": 0.0005997585139994044, "clip_ratio/low_min": 2.0589081941579934e-05, "clip_ratio/region_mean": 0.0012950749405717943, "epoch": 0.06477317140040484, "grad_norm": 0.15035361051559448, "learning_rate": 2e-07, "loss": 0.0447, "step": 694 }, { "clip_ratio/high_max": 0.001968824606592534, "clip_ratio/high_mean": 0.0006885302464070264, "clip_ratio/low_mean": 0.0006388464607880451, "clip_ratio/low_min": 5.361286002880661e-05, "clip_ratio/region_mean": 0.0013273767217469867, "epoch": 0.06486650450040542, "grad_norm": 0.12211810052394867, "learning_rate": 2e-07, "loss": 0.0423, "step": 695 }, { "clip_ratio/high_max": 0.0018784432213578839, "clip_ratio/high_mean": 0.0007608671276102541, "clip_ratio/low_mean": 0.0005259094959910726, "clip_ratio/low_min": 6.476586167991627e-05, "clip_ratio/region_mean": 0.0012867766454291996, "epoch": 0.064959837600406, "grad_norm": 0.12226026505231857, "learning_rate": 2e-07, "loss": 0.0267, "step": 696 }, { "clip_ratio/high_max": 0.0018431671887810808, "clip_ratio/high_mean": 0.0007057022412482183, "clip_ratio/low_mean": 0.0005839042160005192, "clip_ratio/low_min": 5.816106386191677e-05, "clip_ratio/region_mean": 0.0012896064508822747, "epoch": 0.06505317070040659, "grad_norm": 0.12323998659849167, "learning_rate": 2e-07, "loss": -0.0041, "step": 697 }, { "clip_ratio/high_max": 0.00163398525910452, "clip_ratio/high_mean": 0.0007089340015227208, "clip_ratio/low_mean": 0.0006319398671621457, "clip_ratio/low_min": 8.120212078210898e-05, "clip_ratio/region_mean": 0.0013408738632278983, "epoch": 0.06514650380040717, "grad_norm": 0.149945467710495, "learning_rate": 2e-07, "loss": 0.0397, "step": 698 }, { "clip_ratio/high_max": 0.001924309523019474, "clip_ratio/high_mean": 0.0007362278865912231, "clip_ratio/low_mean": 0.0006118863766459981, "clip_ratio/low_min": 5.929132476012455e-05, "clip_ratio/region_mean": 0.0013481142668752, "epoch": 0.06523983690040774, "grad_norm": 0.13071078062057495, "learning_rate": 2e-07, "loss": 0.0419, "step": 699 }, { "clip_ratio/high_max": 0.0016351238264178392, "clip_ratio/high_mean": 0.0006699943733110558, "clip_ratio/low_mean": 0.0005900290379940998, "clip_ratio/low_min": 4.705017909145681e-05, "clip_ratio/region_mean": 0.0012600233785633463, "epoch": 0.06533317000040834, "grad_norm": 0.1272680014371872, "learning_rate": 2e-07, "loss": 0.0632, "step": 700 }, { "clip_ratio/high_max": 0.0018430365162203088, "clip_ratio/high_mean": 0.000714188628080592, "clip_ratio/low_mean": 0.0006304321350398823, "clip_ratio/low_min": 5.406531727203401e-05, "clip_ratio/region_mean": 0.0013446207849483471, "epoch": 0.06542650310040891, "grad_norm": 0.12821176648139954, "learning_rate": 2e-07, "loss": 0.0542, "step": 701 }, { "clip_ratio/high_max": 0.0017104860598919913, "clip_ratio/high_mean": 0.0007120731152099324, "clip_ratio/low_mean": 0.000566356209674268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012784293430740945, "epoch": 0.06551983620040949, "grad_norm": 0.12871773540973663, "learning_rate": 2e-07, "loss": -0.0011, "step": 702 }, { "clip_ratio/high_max": 0.0017650730878813192, "clip_ratio/high_mean": 0.0007162299425544916, "clip_ratio/low_mean": 0.00052325281285448, "clip_ratio/low_min": 1.2744698324240744e-05, "clip_ratio/region_mean": 0.0012394827026582789, "epoch": 0.06561316930041008, "grad_norm": 0.1262146383523941, "learning_rate": 2e-07, "loss": 0.008, "step": 703 }, { "clip_ratio/high_max": 0.0017712773187668063, "clip_ratio/high_mean": 0.0006911707832841785, "clip_ratio/low_mean": 0.0005888854225304385, "clip_ratio/low_min": 2.155915899493266e-05, "clip_ratio/region_mean": 0.0012800561744370498, "epoch": 0.06570650240041066, "grad_norm": 0.12825819849967957, "learning_rate": 2e-07, "loss": 0.0453, "step": 704 }, { "clip_ratio/high_max": 0.0018428367911837995, "clip_ratio/high_mean": 0.0006955708922760095, "clip_ratio/low_mean": 0.0005074369155408931, "clip_ratio/low_min": 5.127897748025134e-05, "clip_ratio/region_mean": 0.0012030078469251748, "epoch": 0.06579983550041126, "grad_norm": 0.13235251605510712, "learning_rate": 2e-07, "loss": 0.0508, "step": 705 }, { "clip_ratio/high_max": 0.0018140952415706124, "clip_ratio/high_mean": 0.0007767200331727508, "clip_ratio/low_mean": 0.00055802962378948, "clip_ratio/low_min": 1.4016595741850324e-05, "clip_ratio/region_mean": 0.0013347496387723368, "epoch": 0.06589316860041183, "grad_norm": 0.12588077783584595, "learning_rate": 2e-07, "loss": 0.0285, "step": 706 }, { "clip_ratio/high_max": 0.0020187348272884265, "clip_ratio/high_mean": 0.0007664247641514521, "clip_ratio/low_mean": 0.0005913617524129222, "clip_ratio/low_min": 2.1236834072624333e-05, "clip_ratio/region_mean": 0.0013577864847320598, "epoch": 0.06598650170041241, "grad_norm": 0.12371828407049179, "learning_rate": 2e-07, "loss": 0.0063, "step": 707 }, { "clip_ratio/high_max": 0.001911364270199556, "clip_ratio/high_mean": 0.0007227292880997993, "clip_ratio/low_mean": 0.0005666977522196248, "clip_ratio/low_min": 2.8496522645582445e-05, "clip_ratio/region_mean": 0.0012894270548713394, "epoch": 0.066079834800413, "grad_norm": 0.4781041443347931, "learning_rate": 2e-07, "loss": 0.0267, "step": 708 }, { "clip_ratio/high_max": 0.0016880758485058323, "clip_ratio/high_mean": 0.0006627570710406872, "clip_ratio/low_mean": 0.0005344714281818597, "clip_ratio/low_min": 2.2407637516153045e-05, "clip_ratio/region_mean": 0.0011972284883086104, "epoch": 0.06617316790041358, "grad_norm": 0.12130901962518692, "learning_rate": 2e-07, "loss": 0.0156, "step": 709 }, { "clip_ratio/high_max": 0.0017600160208530724, "clip_ratio/high_mean": 0.0006227733556443127, "clip_ratio/low_mean": 0.0006084926553739933, "clip_ratio/low_min": 1.196401262859581e-05, "clip_ratio/region_mean": 0.0012312660583120305, "epoch": 0.06626650100041416, "grad_norm": 0.14432811737060547, "learning_rate": 2e-07, "loss": 0.0349, "step": 710 }, { "clip_ratio/high_max": 0.001989369331568014, "clip_ratio/high_mean": 0.0007471108274330618, "clip_ratio/low_mean": 0.000570840547879925, "clip_ratio/low_min": 4.3036417991970666e-05, "clip_ratio/region_mean": 0.0013179513553041033, "epoch": 0.06635983410041475, "grad_norm": 0.1312975436449051, "learning_rate": 2e-07, "loss": -0.0153, "step": 711 }, { "clip_ratio/high_max": 0.0018217051874671597, "clip_ratio/high_mean": 0.0006972681658226065, "clip_ratio/low_mean": 0.0005065220320830122, "clip_ratio/low_min": 1.062383125827182e-05, "clip_ratio/region_mean": 0.0012037902197334915, "epoch": 0.06645316720041533, "grad_norm": 0.13196122646331787, "learning_rate": 2e-07, "loss": 0.0017, "step": 712 }, { "clip_ratio/high_max": 0.0014731160190422088, "clip_ratio/high_mean": 0.0006288127842708491, "clip_ratio/low_mean": 0.0006960149603401078, "clip_ratio/low_min": 7.496079433622072e-05, "clip_ratio/region_mean": 0.0013248277464299463, "epoch": 0.06654650030041591, "grad_norm": 0.16227781772613525, "learning_rate": 2e-07, "loss": 0.0752, "step": 713 }, { "clip_ratio/high_max": 0.001821120207750937, "clip_ratio/high_mean": 0.0007104326596163446, "clip_ratio/low_mean": 0.0005290385552143562, "clip_ratio/low_min": 4.164237361692358e-05, "clip_ratio/region_mean": 0.0012394712393870577, "epoch": 0.0666398334004165, "grad_norm": 0.11802949011325836, "learning_rate": 2e-07, "loss": 0.0259, "step": 714 }, { "clip_ratio/high_max": 0.0016361718080588616, "clip_ratio/high_mean": 0.0006976287877478171, "clip_ratio/low_mean": 0.0005578648951996001, "clip_ratio/low_min": 6.286701591307065e-05, "clip_ratio/region_mean": 0.0012554936984088272, "epoch": 0.06673316650041708, "grad_norm": 0.12575367093086243, "learning_rate": 2e-07, "loss": 0.01, "step": 715 }, { "clip_ratio/high_max": 0.0015306192872230895, "clip_ratio/high_mean": 0.0006829078611190198, "clip_ratio/low_mean": 0.0005652276231558062, "clip_ratio/low_min": 2.6371308194939047e-05, "clip_ratio/region_mean": 0.0012481355042837095, "epoch": 0.06682649960041767, "grad_norm": 0.12255307286977768, "learning_rate": 2e-07, "loss": 0.0426, "step": 716 }, { "clip_ratio/high_max": 0.0017676871648291126, "clip_ratio/high_mean": 0.0006988621153141139, "clip_ratio/low_mean": 0.000594995113715413, "clip_ratio/low_min": 3.6264593290979974e-05, "clip_ratio/region_mean": 0.0012938572035636753, "epoch": 0.06691983270041825, "grad_norm": 0.13913819193840027, "learning_rate": 2e-07, "loss": 0.011, "step": 717 }, { "clip_ratio/high_max": 0.0016347115742973983, "clip_ratio/high_mean": 0.0006760210362699581, "clip_ratio/low_mean": 0.0004997788200853392, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011757998508983292, "epoch": 0.06701316580041883, "grad_norm": 0.1278361827135086, "learning_rate": 2e-07, "loss": -0.006, "step": 718 }, { "clip_ratio/high_max": 0.0019817051470454317, "clip_ratio/high_mean": 0.0008013378246687353, "clip_ratio/low_mean": 0.0006236618937691674, "clip_ratio/low_min": 8.782880286162253e-05, "clip_ratio/region_mean": 0.001424999711161945, "epoch": 0.06710649890041942, "grad_norm": 0.1471550017595291, "learning_rate": 2e-07, "loss": 0.0135, "step": 719 }, { "clip_ratio/high_max": 0.0017789040539355483, "clip_ratio/high_mean": 0.0006382274532370502, "clip_ratio/low_mean": 0.000589313547607162, "clip_ratio/low_min": 2.098758523061406e-05, "clip_ratio/region_mean": 0.0012275410153961275, "epoch": 0.06719983200042, "grad_norm": 0.13642792403697968, "learning_rate": 2e-07, "loss": 0.0552, "step": 720 }, { "clip_ratio/high_max": 0.002038842605543323, "clip_ratio/high_mean": 0.0008691268412803765, "clip_ratio/low_mean": 0.0006305976894509513, "clip_ratio/low_min": 6.795874242015998e-05, "clip_ratio/region_mean": 0.0014997245161794126, "epoch": 0.06729316510042058, "grad_norm": 0.14861653745174408, "learning_rate": 2e-07, "loss": 0.0272, "step": 721 }, { "clip_ratio/high_max": 0.0020597925831680186, "clip_ratio/high_mean": 0.0008074911420408171, "clip_ratio/low_mean": 0.0005606143495242577, "clip_ratio/low_min": 8.225849342125002e-06, "clip_ratio/region_mean": 0.001368105451547308, "epoch": 0.06738649820042117, "grad_norm": 0.12199034541845322, "learning_rate": 2e-07, "loss": -0.0107, "step": 722 }, { "clip_ratio/high_max": 0.0017646953674557153, "clip_ratio/high_mean": 0.000686372845848382, "clip_ratio/low_mean": 0.0005738396848755656, "clip_ratio/low_min": 1.3830493116984144e-05, "clip_ratio/region_mean": 0.0012602125243574847, "epoch": 0.06747983130042175, "grad_norm": 0.13809935748577118, "learning_rate": 2e-07, "loss": 0.032, "step": 723 }, { "clip_ratio/high_max": 0.002041535968601238, "clip_ratio/high_mean": 0.0008074734596448252, "clip_ratio/low_mean": 0.0005946322085037536, "clip_ratio/low_min": 2.385093739576405e-05, "clip_ratio/region_mean": 0.0014021056776982732, "epoch": 0.06757316440042234, "grad_norm": 0.13424062728881836, "learning_rate": 2e-07, "loss": 0.002, "step": 724 }, { "clip_ratio/high_max": 0.0016238933603744954, "clip_ratio/high_mean": 0.0006851082889625104, "clip_ratio/low_mean": 0.0006468245919677429, "clip_ratio/low_min": 8.485943908453919e-05, "clip_ratio/region_mean": 0.0013319329264049884, "epoch": 0.06766649750042292, "grad_norm": 0.12736429274082184, "learning_rate": 2e-07, "loss": 0.0593, "step": 725 }, { "clip_ratio/high_max": 0.001720933745673392, "clip_ratio/high_mean": 0.0006456746932599344, "clip_ratio/low_mean": 0.0006141784378996817, "clip_ratio/low_min": 6.234898955881363e-05, "clip_ratio/region_mean": 0.0012598531138792168, "epoch": 0.0677598306004235, "grad_norm": 0.13584652543067932, "learning_rate": 2e-07, "loss": 0.0867, "step": 726 }, { "clip_ratio/high_max": 0.0020131024430156685, "clip_ratio/high_mean": 0.0008426957574556582, "clip_ratio/low_mean": 0.0005707887848984683, "clip_ratio/low_min": 4.9046240746974945e-05, "clip_ratio/region_mean": 0.0014134845769149251, "epoch": 0.06785316370042409, "grad_norm": 0.13958778977394104, "learning_rate": 2e-07, "loss": -0.0093, "step": 727 }, { "clip_ratio/high_max": 0.0016818938711367082, "clip_ratio/high_mean": 0.000640437532638316, "clip_ratio/low_mean": 0.000573838988202624, "clip_ratio/low_min": 5.9265707477607066e-05, "clip_ratio/region_mean": 0.0012142765044700354, "epoch": 0.06794649680042467, "grad_norm": 0.12061227113008499, "learning_rate": 2e-07, "loss": 0.015, "step": 728 }, { "clip_ratio/high_max": 0.0019786486045632046, "clip_ratio/high_mean": 0.0007174556922109332, "clip_ratio/low_mean": 0.0006398026271199342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013572583229688462, "epoch": 0.06803982990042524, "grad_norm": 0.1313309520483017, "learning_rate": 2e-07, "loss": 0.0345, "step": 729 }, { "clip_ratio/high_max": 0.0019008902963832952, "clip_ratio/high_mean": 0.000787861865319428, "clip_ratio/low_mean": 0.000585956115173758, "clip_ratio/low_min": 1.3196790860092733e-05, "clip_ratio/region_mean": 0.0013738179841311648, "epoch": 0.06813316300042584, "grad_norm": 0.14793342351913452, "learning_rate": 2e-07, "loss": -0.0028, "step": 730 }, { "clip_ratio/high_max": 0.00202564479332068, "clip_ratio/high_mean": 0.0007611770670337137, "clip_ratio/low_mean": 0.0006485042886197334, "clip_ratio/low_min": 5.51178136447561e-05, "clip_ratio/region_mean": 0.0014096813465585, "epoch": 0.06822649610042641, "grad_norm": 0.12728330492973328, "learning_rate": 2e-07, "loss": 0.0236, "step": 731 }, { "clip_ratio/high_max": 0.0019234822502767202, "clip_ratio/high_mean": 0.0008350364496436669, "clip_ratio/low_mean": 0.0005902909360884223, "clip_ratio/low_min": 8.934953257266898e-06, "clip_ratio/region_mean": 0.001425327380275121, "epoch": 0.06831982920042699, "grad_norm": 0.1414061337709427, "learning_rate": 2e-07, "loss": -0.0035, "step": 732 }, { "clip_ratio/high_max": 0.0014034231171535794, "clip_ratio/high_mean": 0.0006174422105686972, "clip_ratio/low_mean": 0.0007025871491350699, "clip_ratio/low_min": 4.7604851715732366e-05, "clip_ratio/region_mean": 0.0013200293942645658, "epoch": 0.06841316230042759, "grad_norm": 0.13389013707637787, "learning_rate": 2e-07, "loss": 0.0664, "step": 733 }, { "clip_ratio/high_max": 0.0015746317149023525, "clip_ratio/high_mean": 0.0006251774302654667, "clip_ratio/low_mean": 0.0006905611389811384, "clip_ratio/low_min": 6.208332160895225e-05, "clip_ratio/region_mean": 0.0013157385546946898, "epoch": 0.06850649540042816, "grad_norm": 0.14052222669124603, "learning_rate": 2e-07, "loss": 0.0632, "step": 734 }, { "clip_ratio/high_max": 0.0017099685937864706, "clip_ratio/high_mean": 0.0006722791913489345, "clip_ratio/low_mean": 0.0006368850608851062, "clip_ratio/low_min": 7.924424699012889e-05, "clip_ratio/region_mean": 0.001309164239501115, "epoch": 0.06859982850042876, "grad_norm": 0.12538683414459229, "learning_rate": 2e-07, "loss": 0.0256, "step": 735 }, { "clip_ratio/high_max": 0.0017369605229760054, "clip_ratio/high_mean": 0.0007407784160022857, "clip_ratio/low_mean": 0.0005957555713393958, "clip_ratio/low_min": 1.813960261642933e-05, "clip_ratio/region_mean": 0.001336533976427745, "epoch": 0.06869316160042933, "grad_norm": 0.1312275528907776, "learning_rate": 2e-07, "loss": 0.0153, "step": 736 }, { "clip_ratio/high_max": 0.0017714449350023642, "clip_ratio/high_mean": 0.000799140289927891, "clip_ratio/low_mean": 0.0006108619681981509, "clip_ratio/low_min": 2.4706812837393954e-05, "clip_ratio/region_mean": 0.0014100022617640207, "epoch": 0.06878649470042991, "grad_norm": 0.1298689991235733, "learning_rate": 2e-07, "loss": 0.0091, "step": 737 }, { "clip_ratio/high_max": 0.0018455002209520899, "clip_ratio/high_mean": 0.0008113680378301069, "clip_ratio/low_mean": 0.0006094640157243703, "clip_ratio/low_min": 4.715655268228147e-05, "clip_ratio/region_mean": 0.0014208320862962864, "epoch": 0.0688798278004305, "grad_norm": 0.14072157442569733, "learning_rate": 2e-07, "loss": 0.0135, "step": 738 }, { "clip_ratio/high_max": 0.0017453860637033358, "clip_ratio/high_mean": 0.0006908440591359977, "clip_ratio/low_mean": 0.0006311995175565244, "clip_ratio/low_min": 3.4417424103594385e-05, "clip_ratio/region_mean": 0.001322043601248879, "epoch": 0.06897316090043108, "grad_norm": 0.13882027566432953, "learning_rate": 2e-07, "loss": 0.0228, "step": 739 }, { "clip_ratio/high_max": 0.0016288513934341609, "clip_ratio/high_mean": 0.0006995230523898499, "clip_ratio/low_mean": 0.0005564735402003862, "clip_ratio/low_min": 3.338638362038182e-05, "clip_ratio/region_mean": 0.0012559966198750772, "epoch": 0.06906649400043166, "grad_norm": 0.13416001200675964, "learning_rate": 2e-07, "loss": -0.0091, "step": 740 }, { "clip_ratio/high_max": 0.0018814712093444541, "clip_ratio/high_mean": 0.0007645155583304586, "clip_ratio/low_mean": 0.0005276770343698445, "clip_ratio/low_min": 2.2476402591564693e-05, "clip_ratio/region_mean": 0.0012921925845148508, "epoch": 0.06915982710043225, "grad_norm": 0.13226832449436188, "learning_rate": 2e-07, "loss": -0.0145, "step": 741 }, { "clip_ratio/high_max": 0.00182180222691386, "clip_ratio/high_mean": 0.0007294012557395035, "clip_ratio/low_mean": 0.0005697359756595688, "clip_ratio/low_min": 2.4528826543246396e-05, "clip_ratio/region_mean": 0.0012991372532269452, "epoch": 0.06925316020043283, "grad_norm": 0.12990514934062958, "learning_rate": 2e-07, "loss": 0.0266, "step": 742 }, { "clip_ratio/high_max": 0.0017791721893445356, "clip_ratio/high_mean": 0.0006149688942969078, "clip_ratio/low_mean": 0.0005969194844510639, "clip_ratio/low_min": 7.071129130054032e-05, "clip_ratio/region_mean": 0.0012118883787479717, "epoch": 0.06934649330043341, "grad_norm": 0.14736603200435638, "learning_rate": 2e-07, "loss": 0.0594, "step": 743 }, { "clip_ratio/high_max": 0.0019202605362806935, "clip_ratio/high_mean": 0.0007219611052278196, "clip_ratio/low_mean": 0.0005373491130740149, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012593102328537498, "epoch": 0.069439826400434, "grad_norm": 0.13976521790027618, "learning_rate": 2e-07, "loss": 0.0286, "step": 744 }, { "clip_ratio/high_max": 0.0022700840345351025, "clip_ratio/high_mean": 0.0009299338234995957, "clip_ratio/low_mean": 0.0005541553782677511, "clip_ratio/low_min": 1.394466744386591e-05, "clip_ratio/region_mean": 0.0014840891817584634, "epoch": 0.06953315950043458, "grad_norm": 0.1326330453157425, "learning_rate": 2e-07, "loss": 0.012, "step": 745 }, { "clip_ratio/high_max": 0.0019038873942918144, "clip_ratio/high_mean": 0.0007835072738089366, "clip_ratio/low_mean": 0.0005991257921778015, "clip_ratio/low_min": 3.882667351717828e-05, "clip_ratio/region_mean": 0.0013826330578012858, "epoch": 0.06962649260043517, "grad_norm": 0.13354718685150146, "learning_rate": 2e-07, "loss": 0.0364, "step": 746 }, { "clip_ratio/high_max": 0.00188977029392845, "clip_ratio/high_mean": 0.0007578237382404041, "clip_ratio/low_mean": 0.0006561072568729287, "clip_ratio/low_min": 6.512451818707632e-06, "clip_ratio/region_mean": 0.001413931000570301, "epoch": 0.06971982570043575, "grad_norm": 0.14631761610507965, "learning_rate": 2e-07, "loss": 0.0255, "step": 747 }, { "clip_ratio/high_max": 0.0020120538902119733, "clip_ratio/high_mean": 0.0007912606961326674, "clip_ratio/low_mean": 0.0006326311649900163, "clip_ratio/low_min": 2.893444434448611e-05, "clip_ratio/region_mean": 0.001423891844751779, "epoch": 0.06981315880043633, "grad_norm": 0.12877687811851501, "learning_rate": 2e-07, "loss": 0.0365, "step": 748 }, { "clip_ratio/high_max": 0.0020629137798096053, "clip_ratio/high_mean": 0.000723976931112702, "clip_ratio/low_mean": 0.0005688386463589268, "clip_ratio/low_min": 1.9869094103341922e-05, "clip_ratio/region_mean": 0.0012928155774716288, "epoch": 0.06990649190043692, "grad_norm": 0.15203729271888733, "learning_rate": 2e-07, "loss": -0.029, "step": 749 }, { "clip_ratio/high_max": 0.0015529661468463019, "clip_ratio/high_mean": 0.0005806366698379861, "clip_ratio/low_mean": 0.0006421755033443333, "clip_ratio/low_min": 3.512858438625699e-05, "clip_ratio/region_mean": 0.0012228121631778777, "epoch": 0.0699998250004375, "grad_norm": 0.12965691089630127, "learning_rate": 2e-07, "loss": 0.0547, "step": 750 }, { "clip_ratio/high_max": 0.002022711021709256, "clip_ratio/high_mean": 0.0008650339932501083, "clip_ratio/low_mean": 0.000626831464614952, "clip_ratio/low_min": 5.443232021207223e-05, "clip_ratio/region_mean": 0.0014918654960638378, "epoch": 0.07009315810043808, "grad_norm": 0.15232199430465698, "learning_rate": 2e-07, "loss": 0.026, "step": 751 }, { "clip_ratio/high_max": 0.0019984413738711737, "clip_ratio/high_mean": 0.0008110612689051777, "clip_ratio/low_mean": 0.0006659698628936894, "clip_ratio/low_min": 2.1532819118874613e-05, "clip_ratio/region_mean": 0.0014770311427128036, "epoch": 0.07018649120043867, "grad_norm": 0.14948713779449463, "learning_rate": 2e-07, "loss": -0.0221, "step": 752 }, { "clip_ratio/high_max": 0.0017223160830326378, "clip_ratio/high_mean": 0.0006585423780052224, "clip_ratio/low_mean": 0.0006180987074912991, "clip_ratio/low_min": 6.511421997856814e-05, "clip_ratio/region_mean": 0.0012766411091433838, "epoch": 0.07027982430043925, "grad_norm": 0.12757332623004913, "learning_rate": 2e-07, "loss": 0.0249, "step": 753 }, { "clip_ratio/high_max": 0.002022508248046506, "clip_ratio/high_mean": 0.0008284197156172013, "clip_ratio/low_mean": 0.0006186634236655664, "clip_ratio/low_min": 3.2501298846909776e-05, "clip_ratio/region_mean": 0.0014470831374637783, "epoch": 0.07037315740043983, "grad_norm": 10.3920259475708, "learning_rate": 2e-07, "loss": 0.0343, "step": 754 }, { "clip_ratio/high_max": 0.0015992866137821693, "clip_ratio/high_mean": 0.0007028676827758318, "clip_ratio/low_mean": 0.0006047942206350854, "clip_ratio/low_min": 2.5946865207515657e-05, "clip_ratio/region_mean": 0.001307661907048896, "epoch": 0.07046649050044042, "grad_norm": 0.14464056491851807, "learning_rate": 2e-07, "loss": 0.0255, "step": 755 }, { "clip_ratio/high_max": 0.002156022634153487, "clip_ratio/high_mean": 0.0008191678261937341, "clip_ratio/low_mean": 0.0006043455796316266, "clip_ratio/low_min": 1.908193053168361e-05, "clip_ratio/region_mean": 0.0014235134003683925, "epoch": 0.070559823600441, "grad_norm": 0.13640077412128448, "learning_rate": 2e-07, "loss": -0.0086, "step": 756 }, { "clip_ratio/high_max": 0.0016338919376721606, "clip_ratio/high_mean": 0.0007774683890602319, "clip_ratio/low_mean": 0.0007251419847307261, "clip_ratio/low_min": 3.3545678888913244e-05, "clip_ratio/region_mean": 0.0015026103537820745, "epoch": 0.07065315670044159, "grad_norm": 0.14856377243995667, "learning_rate": 2e-07, "loss": 0.0562, "step": 757 }, { "clip_ratio/high_max": 0.0019688043394126, "clip_ratio/high_mean": 0.0008163420116034104, "clip_ratio/low_mean": 0.0005960196012892993, "clip_ratio/low_min": 1.1966302736254875e-05, "clip_ratio/region_mean": 0.0014123615910648368, "epoch": 0.07074648980044217, "grad_norm": 0.12315496802330017, "learning_rate": 2e-07, "loss": 0.0196, "step": 758 }, { "clip_ratio/high_max": 0.0016924047267821152, "clip_ratio/high_mean": 0.0006796423795094597, "clip_ratio/low_mean": 0.0006029579089954495, "clip_ratio/low_min": 1.5539212199655594e-05, "clip_ratio/region_mean": 0.0012826003003283404, "epoch": 0.07083982290044274, "grad_norm": 0.13579337298870087, "learning_rate": 2e-07, "loss": 0.0471, "step": 759 }, { "clip_ratio/high_max": 0.0017405014441465028, "clip_ratio/high_mean": 0.0007377229321718914, "clip_ratio/low_mean": 0.0007337011684285244, "clip_ratio/low_min": 1.540927041787654e-05, "clip_ratio/region_mean": 0.001471424096962437, "epoch": 0.07093315600044334, "grad_norm": 0.15686245262622833, "learning_rate": 2e-07, "loss": 0.0408, "step": 760 }, { "clip_ratio/high_max": 0.0016048759425757453, "clip_ratio/high_mean": 0.0006105501306592487, "clip_ratio/low_mean": 0.0006372321549861226, "clip_ratio/low_min": 9.380648134538205e-05, "clip_ratio/region_mean": 0.0012477823001972865, "epoch": 0.07102648910044392, "grad_norm": 0.12125812470912933, "learning_rate": 2e-07, "loss": 0.0567, "step": 761 }, { "clip_ratio/high_max": 0.001938916746439645, "clip_ratio/high_mean": 0.0007488416513297125, "clip_ratio/low_mean": 0.0006219257575139636, "clip_ratio/low_min": 2.036825753748417e-05, "clip_ratio/region_mean": 0.001370767451589927, "epoch": 0.0711198222004445, "grad_norm": 0.14363320171833038, "learning_rate": 2e-07, "loss": 0.0269, "step": 762 }, { "clip_ratio/high_max": 0.0020458885628613643, "clip_ratio/high_mean": 0.0008150991016009357, "clip_ratio/low_mean": 0.000612141808232991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014272409171098843, "epoch": 0.07121315530044509, "grad_norm": 0.15316857397556305, "learning_rate": 2e-07, "loss": -0.0045, "step": 763 }, { "clip_ratio/high_max": 0.0016988413808576297, "clip_ratio/high_mean": 0.0007218188902697875, "clip_ratio/low_mean": 0.0006334005747703486, "clip_ratio/low_min": 3.572069272195222e-05, "clip_ratio/region_mean": 0.0013552194541261997, "epoch": 0.07130648840044566, "grad_norm": 0.1329914629459381, "learning_rate": 2e-07, "loss": 0.0404, "step": 764 }, { "clip_ratio/high_max": 0.001977858726604609, "clip_ratio/high_mean": 0.0008104218886728631, "clip_ratio/low_mean": 0.0007157653344620485, "clip_ratio/low_min": 7.466999159078114e-05, "clip_ratio/region_mean": 0.0015261872104019858, "epoch": 0.07139982150044624, "grad_norm": 0.14950889348983765, "learning_rate": 2e-07, "loss": 0.0242, "step": 765 }, { "clip_ratio/high_max": 0.0019842466499540024, "clip_ratio/high_mean": 0.0008427213015238522, "clip_ratio/low_mean": 0.0006352626105581294, "clip_ratio/low_min": 8.567565873818239e-05, "clip_ratio/region_mean": 0.0014779839511902537, "epoch": 0.07149315460044683, "grad_norm": 0.13594745099544525, "learning_rate": 2e-07, "loss": 0.0006, "step": 766 }, { "clip_ratio/high_max": 0.0016343000897904858, "clip_ratio/high_mean": 0.0007064003239065642, "clip_ratio/low_mean": 0.0006763603469153168, "clip_ratio/low_min": 4.872680074186064e-05, "clip_ratio/region_mean": 0.0013827606671839021, "epoch": 0.07158648770044741, "grad_norm": 0.154115691781044, "learning_rate": 2e-07, "loss": 0.0322, "step": 767 }, { "clip_ratio/high_max": 0.0015900861526461085, "clip_ratio/high_mean": 0.0006840725200163433, "clip_ratio/low_mean": 0.0006002959125908092, "clip_ratio/low_min": 2.858479001588421e-05, "clip_ratio/region_mean": 0.0012843684744439088, "epoch": 0.071679820800448, "grad_norm": 0.14167453348636627, "learning_rate": 2e-07, "loss": -0.0003, "step": 768 }, { "clip_ratio/high_max": 0.0014781792742724065, "clip_ratio/high_mean": 0.0006099792053646524, "clip_ratio/low_mean": 0.0005983139417367056, "clip_ratio/low_min": 0.00010939509866148, "clip_ratio/region_mean": 0.001208293135277927, "completions/clipped_ratio": 0.016095842633928603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 655.1669921875, "completions/mean_terminated_length": 598.8778076171875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.07177315390044858, "grad_norm": 0.26202720403671265, "learning_rate": 2e-07, "loss": 0.0504, "num_tokens": 589071875.0, "reward": 0.5863386392593384, "reward_std": 0.18439003825187683, "rewards/simpleverify_reward/mean": 0.5863385796546936, "rewards/simpleverify_reward/std": 0.4924914240837097, "step": 769 }, { "clip_ratio/high_max": 0.001833040838391753, "clip_ratio/high_mean": 0.0006734365324518876, "clip_ratio/low_mean": 0.0005811282735521672, "clip_ratio/low_min": 3.7520137993851677e-05, "clip_ratio/region_mean": 0.0012545647950901184, "epoch": 0.07186648700044916, "grad_norm": 0.1269025355577469, "learning_rate": 2e-07, "loss": 0.0286, "step": 770 }, { "clip_ratio/high_max": 0.00182122028854792, "clip_ratio/high_mean": 0.0006383624313457403, "clip_ratio/low_mean": 0.000556662345843506, "clip_ratio/low_min": 1.9364750187378377e-05, "clip_ratio/region_mean": 0.001195024793560151, "epoch": 0.07195982010044975, "grad_norm": 0.11978539079427719, "learning_rate": 2e-07, "loss": 0.0575, "step": 771 }, { "clip_ratio/high_max": 0.001550647026306251, "clip_ratio/high_mean": 0.0006025546626915457, "clip_ratio/low_mean": 0.0006839850393589586, "clip_ratio/low_min": 6.36908062006114e-05, "clip_ratio/region_mean": 0.0012865396711276844, "epoch": 0.07205315320045033, "grad_norm": 0.1369551420211792, "learning_rate": 2e-07, "loss": 0.0525, "step": 772 }, { "clip_ratio/high_max": 0.0016724560773582198, "clip_ratio/high_mean": 0.0006427054249797948, "clip_ratio/low_mean": 0.000574533078179229, "clip_ratio/low_min": 2.587933704489842e-05, "clip_ratio/region_mean": 0.0012172385067970026, "epoch": 0.07214648630045091, "grad_norm": 0.12382771074771881, "learning_rate": 2e-07, "loss": 0.0534, "step": 773 }, { "clip_ratio/high_max": 0.0014508609019685537, "clip_ratio/high_mean": 0.0005469740954140434, "clip_ratio/low_mean": 0.000522126578289317, "clip_ratio/low_min": 1.5952016838127747e-05, "clip_ratio/region_mean": 0.0010691006846172968, "epoch": 0.0722398194004515, "grad_norm": 0.127375990152359, "learning_rate": 2e-07, "loss": 0.0347, "step": 774 }, { "clip_ratio/high_max": 0.0018920573565992527, "clip_ratio/high_mean": 0.000626171366093331, "clip_ratio/low_mean": 0.000507831189679564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011340025703248102, "epoch": 0.07233315250045208, "grad_norm": 0.14250198006629944, "learning_rate": 2e-07, "loss": 0.028, "step": 775 }, { "clip_ratio/high_max": 0.0019099104247288778, "clip_ratio/high_mean": 0.0007119679230527254, "clip_ratio/low_mean": 0.0005120594614709262, "clip_ratio/low_min": 1.3845812645740807e-05, "clip_ratio/region_mean": 0.0012240273717907257, "epoch": 0.07242648560045266, "grad_norm": 0.12332292646169662, "learning_rate": 2e-07, "loss": 0.0167, "step": 776 }, { "clip_ratio/high_max": 0.0013264350236568134, "clip_ratio/high_mean": 0.0006094816935728886, "clip_ratio/low_mean": 0.0005187598162592622, "clip_ratio/low_min": 7.307377472898224e-06, "clip_ratio/region_mean": 0.001128241518017603, "epoch": 0.07251981870045325, "grad_norm": 0.1331176906824112, "learning_rate": 2e-07, "loss": 0.0383, "step": 777 }, { "clip_ratio/high_max": 0.0019108845372102223, "clip_ratio/high_mean": 0.0007144267619878519, "clip_ratio/low_mean": 0.0005685477426595753, "clip_ratio/low_min": 4.417019135871669e-05, "clip_ratio/region_mean": 0.0012829745028284378, "epoch": 0.07261315180045383, "grad_norm": 0.12833134829998016, "learning_rate": 2e-07, "loss": 0.0012, "step": 778 }, { "clip_ratio/high_max": 0.0017454239714425057, "clip_ratio/high_mean": 0.0006432412901631324, "clip_ratio/low_mean": 0.0006263824925554218, "clip_ratio/low_min": 3.6387573345564306e-05, "clip_ratio/region_mean": 0.0012696237827185541, "epoch": 0.07270648490045442, "grad_norm": 0.12894085049629211, "learning_rate": 2e-07, "loss": 0.0122, "step": 779 }, { "clip_ratio/high_max": 0.0016816244096844457, "clip_ratio/high_mean": 0.0006803845089962124, "clip_ratio/low_mean": 0.0006154627244541189, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012958472834725399, "epoch": 0.072799818000455, "grad_norm": 0.13945753872394562, "learning_rate": 2e-07, "loss": 0.0019, "step": 780 }, { "clip_ratio/high_max": 0.00214164052522392, "clip_ratio/high_mean": 0.000725825233530486, "clip_ratio/low_mean": 0.0005934840664849617, "clip_ratio/low_min": 5.610407379208482e-05, "clip_ratio/region_mean": 0.0013193092854635324, "epoch": 0.07289315110045558, "grad_norm": 0.14117878675460815, "learning_rate": 2e-07, "loss": 0.0464, "step": 781 }, { "clip_ratio/high_max": 0.001704036974842893, "clip_ratio/high_mean": 0.0006747729139533476, "clip_ratio/low_mean": 0.0005418484035999427, "clip_ratio/low_min": 3.411008037801366e-05, "clip_ratio/region_mean": 0.0012166213418822736, "epoch": 0.07298648420045617, "grad_norm": 0.14113466441631317, "learning_rate": 2e-07, "loss": 0.0151, "step": 782 }, { "clip_ratio/high_max": 0.002116542586009018, "clip_ratio/high_mean": 0.0007058271839923691, "clip_ratio/low_mean": 0.0005910425716137979, "clip_ratio/low_min": 4.877468018094078e-05, "clip_ratio/region_mean": 0.0012968697810720187, "epoch": 0.07307981730045675, "grad_norm": 0.1459830105304718, "learning_rate": 2e-07, "loss": 0.0238, "step": 783 }, { "clip_ratio/high_max": 0.0014133827571640722, "clip_ratio/high_mean": 0.0006136717420304194, "clip_ratio/low_mean": 0.0005624677369269193, "clip_ratio/low_min": 4.925401572108967e-05, "clip_ratio/region_mean": 0.0011761394889617804, "epoch": 0.07317315040045733, "grad_norm": 0.12120173871517181, "learning_rate": 2e-07, "loss": 0.0521, "step": 784 }, { "clip_ratio/high_max": 0.0018333422958676238, "clip_ratio/high_mean": 0.0007060539956000866, "clip_ratio/low_mean": 0.0004661673738155514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011722213748726062, "epoch": 0.07326648350045792, "grad_norm": 0.12322816997766495, "learning_rate": 2e-07, "loss": 0.0133, "step": 785 }, { "clip_ratio/high_max": 0.0017948177992366254, "clip_ratio/high_mean": 0.0007219899707706645, "clip_ratio/low_mean": 0.0005412137170424103, "clip_ratio/low_min": 1.2440286809578538e-05, "clip_ratio/region_mean": 0.0012632036850845907, "epoch": 0.0733598166004585, "grad_norm": 0.13310644030570984, "learning_rate": 2e-07, "loss": 0.0018, "step": 786 }, { "clip_ratio/high_max": 0.0015695174006395973, "clip_ratio/high_mean": 0.000673003023621277, "clip_ratio/low_mean": 0.0004890238133157254, "clip_ratio/low_min": 1.121881177823525e-05, "clip_ratio/region_mean": 0.0011620268087426666, "epoch": 0.07345314970045909, "grad_norm": 0.14041544497013092, "learning_rate": 2e-07, "loss": -0.0158, "step": 787 }, { "clip_ratio/high_max": 0.001891879248432815, "clip_ratio/high_mean": 0.00075017050767201, "clip_ratio/low_mean": 0.0005772367640020093, "clip_ratio/low_min": 3.1890142963675316e-05, "clip_ratio/region_mean": 0.001327407248027157, "epoch": 0.07354648280045967, "grad_norm": 0.12373622506856918, "learning_rate": 2e-07, "loss": -0.0021, "step": 788 }, { "clip_ratio/high_max": 0.0015318896639655577, "clip_ratio/high_mean": 0.0006084103624743875, "clip_ratio/low_mean": 0.000576900224587007, "clip_ratio/low_min": 2.884675814129878e-05, "clip_ratio/region_mean": 0.0011853106116177514, "epoch": 0.07363981590046025, "grad_norm": 0.13347403705120087, "learning_rate": 2e-07, "loss": 0.0136, "step": 789 }, { "clip_ratio/high_max": 0.0014818500094406772, "clip_ratio/high_mean": 0.0006361109790304909, "clip_ratio/low_mean": 0.0005940767323409091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001230187677720096, "epoch": 0.07373314900046084, "grad_norm": 0.13543841242790222, "learning_rate": 2e-07, "loss": 0.0334, "step": 790 }, { "clip_ratio/high_max": 0.0016123496534419246, "clip_ratio/high_mean": 0.0006192747023305856, "clip_ratio/low_mean": 0.0005097080393170472, "clip_ratio/low_min": 1.60503332153894e-05, "clip_ratio/region_mean": 0.0011289827234577388, "epoch": 0.07382648210046142, "grad_norm": 0.13138028979301453, "learning_rate": 2e-07, "loss": 0.0016, "step": 791 }, { "clip_ratio/high_max": 0.0013765665898972657, "clip_ratio/high_mean": 0.0005335067362466361, "clip_ratio/low_mean": 0.0005916447280469583, "clip_ratio/low_min": 1.4912908227415755e-05, "clip_ratio/region_mean": 0.0011251514588366263, "epoch": 0.073919815200462, "grad_norm": 0.12243971973657608, "learning_rate": 2e-07, "loss": 0.0593, "step": 792 }, { "clip_ratio/high_max": 0.0020504188869381323, "clip_ratio/high_mean": 0.0007492648092011223, "clip_ratio/low_mean": 0.000585534574383928, "clip_ratio/low_min": 1.0848811143659987e-05, "clip_ratio/region_mean": 0.0013347993699426297, "epoch": 0.07401314830046259, "grad_norm": 0.13791204988956451, "learning_rate": 2e-07, "loss": 0.0106, "step": 793 }, { "clip_ratio/high_max": 0.001704961214272771, "clip_ratio/high_mean": 0.000613057600276079, "clip_ratio/low_mean": 0.0005092905284982407, "clip_ratio/low_min": 3.165330281262868e-05, "clip_ratio/region_mean": 0.0011223481196793728, "epoch": 0.07410648140046316, "grad_norm": 0.1350066214799881, "learning_rate": 2e-07, "loss": 0.0459, "step": 794 }, { "clip_ratio/high_max": 0.001634477433981374, "clip_ratio/high_mean": 0.0006938577207620256, "clip_ratio/low_mean": 0.0004946283625031356, "clip_ratio/low_min": 3.927860234398395e-05, "clip_ratio/region_mean": 0.0011884860905411188, "epoch": 0.07419981450046374, "grad_norm": 0.13871221244335175, "learning_rate": 2e-07, "loss": 0.0161, "step": 795 }, { "clip_ratio/high_max": 0.0015516856583417393, "clip_ratio/high_mean": 0.0005660045890181209, "clip_ratio/low_mean": 0.0006167932133394061, "clip_ratio/low_min": 3.514307172736153e-05, "clip_ratio/region_mean": 0.0011827978414657991, "epoch": 0.07429314760046433, "grad_norm": 0.14665979146957397, "learning_rate": 2e-07, "loss": 0.0708, "step": 796 }, { "clip_ratio/high_max": 0.0017520424480608199, "clip_ratio/high_mean": 0.0006769001111024409, "clip_ratio/low_mean": 0.0005800953513244167, "clip_ratio/low_min": 2.6594133487378713e-05, "clip_ratio/region_mean": 0.001256995466974331, "epoch": 0.07438648070046491, "grad_norm": 0.13437144458293915, "learning_rate": 2e-07, "loss": 0.0665, "step": 797 }, { "clip_ratio/high_max": 0.001794855787011329, "clip_ratio/high_mean": 0.0007487581333407434, "clip_ratio/low_mean": 0.0005620388692477718, "clip_ratio/low_min": 3.1401965316035785e-05, "clip_ratio/region_mean": 0.0013107969825796317, "epoch": 0.0744798138004655, "grad_norm": 0.13425087928771973, "learning_rate": 2e-07, "loss": -0.0077, "step": 798 }, { "clip_ratio/high_max": 0.001946188356669154, "clip_ratio/high_mean": 0.0007347673763433704, "clip_ratio/low_mean": 0.0005433606193037122, "clip_ratio/low_min": 4.901195143247605e-05, "clip_ratio/region_mean": 0.0012781279838236514, "epoch": 0.07457314690046608, "grad_norm": 0.13724960386753082, "learning_rate": 2e-07, "loss": 0.0125, "step": 799 }, { "clip_ratio/high_max": 0.0015965724996931385, "clip_ratio/high_mean": 0.0006302674692051369, "clip_ratio/low_mean": 0.0005463216002681293, "clip_ratio/low_min": 5.143058479006868e-05, "clip_ratio/region_mean": 0.001176589066744782, "epoch": 0.07466648000046666, "grad_norm": 0.11962824314832687, "learning_rate": 2e-07, "loss": 0.0504, "step": 800 }, { "clip_ratio/high_max": 0.0016645022114971653, "clip_ratio/high_mean": 0.0007076521997078089, "clip_ratio/low_mean": 0.0005040593468947918, "clip_ratio/low_min": 8.934953257266898e-06, "clip_ratio/region_mean": 0.0012117115511500742, "epoch": 0.07475981310046725, "grad_norm": 0.1560024917125702, "learning_rate": 2e-07, "loss": 0.0008, "step": 801 }, { "clip_ratio/high_max": 0.0018497305864002556, "clip_ratio/high_mean": 0.0007217662023322191, "clip_ratio/low_mean": 0.0005779963767054141, "clip_ratio/low_min": 2.8836620913352817e-05, "clip_ratio/region_mean": 0.001299762570852181, "epoch": 0.07485314620046783, "grad_norm": 0.1296125203371048, "learning_rate": 2e-07, "loss": 0.0353, "step": 802 }, { "clip_ratio/high_max": 0.001809755529393442, "clip_ratio/high_mean": 0.000774874904891476, "clip_ratio/low_mean": 0.00046711757022421807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012419924642017577, "epoch": 0.07494647930046841, "grad_norm": 0.132086381316185, "learning_rate": 2e-07, "loss": -0.0093, "step": 803 }, { "clip_ratio/high_max": 0.0016314685854013078, "clip_ratio/high_mean": 0.0005580781798926182, "clip_ratio/low_mean": 0.0005056617710579303, "clip_ratio/low_min": 1.0420139915368054e-05, "clip_ratio/region_mean": 0.001063739946403075, "epoch": 0.075039812400469, "grad_norm": 0.11413397639989853, "learning_rate": 2e-07, "loss": 0.0254, "step": 804 }, { "clip_ratio/high_max": 0.0018310889063286595, "clip_ratio/high_mean": 0.000756471248678281, "clip_ratio/low_mean": 0.0005501477389771026, "clip_ratio/low_min": 1.0255989764118567e-05, "clip_ratio/region_mean": 0.0013066189567325637, "epoch": 0.07513314550046958, "grad_norm": 0.20393221080303192, "learning_rate": 2e-07, "loss": 0.0235, "step": 805 }, { "clip_ratio/high_max": 0.0017719959178066347, "clip_ratio/high_mean": 0.0007169086129579227, "clip_ratio/low_mean": 0.0005473223227454582, "clip_ratio/low_min": 7.12463261152152e-05, "clip_ratio/region_mean": 0.0012642309302464128, "epoch": 0.07522647860047016, "grad_norm": 0.1374519020318985, "learning_rate": 2e-07, "loss": 0.0248, "step": 806 }, { "clip_ratio/high_max": 0.0019289104820927605, "clip_ratio/high_mean": 0.0008027928815863561, "clip_ratio/low_mean": 0.0006491938584076706, "clip_ratio/low_min": 4.7994041779020336e-05, "clip_ratio/region_mean": 0.0014519867581839208, "epoch": 0.07531981170047075, "grad_norm": 0.14597781002521515, "learning_rate": 2e-07, "loss": 0.018, "step": 807 }, { "clip_ratio/high_max": 0.0015129840139707085, "clip_ratio/high_mean": 0.000685712479025824, "clip_ratio/low_mean": 0.0005088247417006642, "clip_ratio/low_min": 1.3300702448759694e-05, "clip_ratio/region_mean": 0.0011945371952606365, "epoch": 0.07541314480047133, "grad_norm": 0.14136047661304474, "learning_rate": 2e-07, "loss": -0.0001, "step": 808 }, { "clip_ratio/high_max": 0.0018651897044037469, "clip_ratio/high_mean": 0.0007652069580217358, "clip_ratio/low_mean": 0.0005746390761487419, "clip_ratio/low_min": 2.3448023966921028e-05, "clip_ratio/region_mean": 0.0013398460396274459, "epoch": 0.07550647790047192, "grad_norm": 0.147970050573349, "learning_rate": 2e-07, "loss": 0.0163, "step": 809 }, { "clip_ratio/high_max": 0.001958991510036867, "clip_ratio/high_mean": 0.0007688003424846102, "clip_ratio/low_mean": 0.0005585069393418962, "clip_ratio/low_min": 4.189849005342694e-05, "clip_ratio/region_mean": 0.0013273073091113474, "epoch": 0.0755998110004725, "grad_norm": 0.1367771178483963, "learning_rate": 2e-07, "loss": 0.0197, "step": 810 }, { "clip_ratio/high_max": 0.0018569914245745167, "clip_ratio/high_mean": 0.0007290806024684571, "clip_ratio/low_mean": 0.0006079063759898418, "clip_ratio/low_min": 2.486642233634484e-05, "clip_ratio/region_mean": 0.0013369870102906134, "epoch": 0.07569314410047308, "grad_norm": 0.14639969170093536, "learning_rate": 2e-07, "loss": 0.0253, "step": 811 }, { "clip_ratio/high_max": 0.0017062740498658968, "clip_ratio/high_mean": 0.0007819854372428381, "clip_ratio/low_mean": 0.0005987690437905258, "clip_ratio/low_min": 5.9143076214240864e-05, "clip_ratio/region_mean": 0.001380754492856795, "epoch": 0.07578647720047367, "grad_norm": 0.14234429597854614, "learning_rate": 2e-07, "loss": 0.0389, "step": 812 }, { "clip_ratio/high_max": 0.0013804529189656023, "clip_ratio/high_mean": 0.0005912401793466415, "clip_ratio/low_mean": 0.0005607520051853498, "clip_ratio/low_min": 4.339407678344287e-05, "clip_ratio/region_mean": 0.001151992164523108, "epoch": 0.07587981030047425, "grad_norm": 0.13619458675384521, "learning_rate": 2e-07, "loss": 0.0306, "step": 813 }, { "clip_ratio/high_max": 0.0016600908857071772, "clip_ratio/high_mean": 0.0005965907712379703, "clip_ratio/low_mean": 0.0006012896628817543, "clip_ratio/low_min": 3.727036346390378e-05, "clip_ratio/region_mean": 0.0011978804213867988, "epoch": 0.07597314340047483, "grad_norm": 0.16401319205760956, "learning_rate": 2e-07, "loss": 0.0725, "step": 814 }, { "clip_ratio/high_max": 0.001683846869127592, "clip_ratio/high_mean": 0.0006842536131443921, "clip_ratio/low_mean": 0.0006626198091908009, "clip_ratio/low_min": 6.617076633119723e-05, "clip_ratio/region_mean": 0.0013468734323396347, "epoch": 0.07606647650047542, "grad_norm": 0.13338203728199005, "learning_rate": 2e-07, "loss": 0.0045, "step": 815 }, { "clip_ratio/high_max": 0.0018879020935855806, "clip_ratio/high_mean": 0.0008024603939702502, "clip_ratio/low_mean": 0.000514691010266688, "clip_ratio/low_min": 1.3703135664400179e-05, "clip_ratio/region_mean": 0.0013171513855922967, "epoch": 0.076159809600476, "grad_norm": 0.14281673729419708, "learning_rate": 2e-07, "loss": -0.0192, "step": 816 }, { "clip_ratio/high_max": 0.0017880633677123114, "clip_ratio/high_mean": 0.0007443195099767763, "clip_ratio/low_mean": 0.0005984746940157493, "clip_ratio/low_min": 8.708844870852772e-05, "clip_ratio/region_mean": 0.0013427941958070733, "epoch": 0.07625314270047658, "grad_norm": 0.1438835710287094, "learning_rate": 2e-07, "loss": -0.0193, "step": 817 }, { "clip_ratio/high_max": 0.0019252267738920636, "clip_ratio/high_mean": 0.000655231240671128, "clip_ratio/low_mean": 0.0006179340252856491, "clip_ratio/low_min": 3.0273673473857343e-05, "clip_ratio/region_mean": 0.0012731652641377877, "epoch": 0.07634647580047717, "grad_norm": 0.12540386617183685, "learning_rate": 2e-07, "loss": 0.0657, "step": 818 }, { "clip_ratio/high_max": 0.0015632547838322353, "clip_ratio/high_mean": 0.0006705824234813917, "clip_ratio/low_mean": 0.0005352399584808154, "clip_ratio/low_min": 5.693432467523962e-05, "clip_ratio/region_mean": 0.001205822394695133, "epoch": 0.07643980890047775, "grad_norm": 0.13703253865242004, "learning_rate": 2e-07, "loss": 0.0456, "step": 819 }, { "clip_ratio/high_max": 0.00192898230307037, "clip_ratio/high_mean": 0.0007037546911305981, "clip_ratio/low_mean": 0.0005784247268820764, "clip_ratio/low_min": 4.272923251846805e-05, "clip_ratio/region_mean": 0.0012821794080082327, "epoch": 0.07653314200047834, "grad_norm": 0.11834394931793213, "learning_rate": 2e-07, "loss": 0.0421, "step": 820 }, { "clip_ratio/high_max": 0.0015736531204311177, "clip_ratio/high_mean": 0.0006688269968435634, "clip_ratio/low_mean": 0.000613948071077175, "clip_ratio/low_min": 8.350497682840796e-05, "clip_ratio/region_mean": 0.0012827750470023602, "epoch": 0.07662647510047892, "grad_norm": 0.13649001717567444, "learning_rate": 2e-07, "loss": 0.0446, "step": 821 }, { "clip_ratio/high_max": 0.0016398711704823654, "clip_ratio/high_mean": 0.0006595290196855785, "clip_ratio/low_mean": 0.0005351198815333191, "clip_ratio/low_min": 4.1091743696597405e-05, "clip_ratio/region_mean": 0.0011946489175898023, "epoch": 0.0767198082004795, "grad_norm": 0.12645286321640015, "learning_rate": 2e-07, "loss": 0.0187, "step": 822 }, { "clip_ratio/high_max": 0.0016935367311816663, "clip_ratio/high_mean": 0.0006942493946553441, "clip_ratio/low_mean": 0.0005594274443865288, "clip_ratio/low_min": 1.0724090316216461e-05, "clip_ratio/region_mean": 0.001253676844498841, "epoch": 0.07681314130048009, "grad_norm": 0.12123214453458786, "learning_rate": 2e-07, "loss": 0.0188, "step": 823 }, { "clip_ratio/high_max": 0.0015939523400447797, "clip_ratio/high_mean": 0.0006969505539018428, "clip_ratio/low_mean": 0.0005984740364510799, "clip_ratio/low_min": 1.2327415788604412e-05, "clip_ratio/region_mean": 0.0012954245758010074, "epoch": 0.07690647440048066, "grad_norm": 0.12751084566116333, "learning_rate": 2e-07, "loss": 0.0542, "step": 824 }, { "clip_ratio/high_max": 0.0018998469022335485, "clip_ratio/high_mean": 0.000678361635436886, "clip_ratio/low_mean": 0.0004997128567083564, "clip_ratio/low_min": 1.1328620530548505e-05, "clip_ratio/region_mean": 0.0011780744971474633, "epoch": 0.07699980750048124, "grad_norm": 0.13260550796985626, "learning_rate": 2e-07, "loss": 0.0053, "step": 825 }, { "clip_ratio/high_max": 0.002143698133295402, "clip_ratio/high_mean": 0.0008798874350759434, "clip_ratio/low_mean": 0.0006661234074272215, "clip_ratio/low_min": 7.995417763595469e-05, "clip_ratio/region_mean": 0.001546010793390451, "epoch": 0.07709314060048184, "grad_norm": 0.15077175199985504, "learning_rate": 2e-07, "loss": 0.0419, "step": 826 }, { "clip_ratio/high_max": 0.0018457735095580574, "clip_ratio/high_mean": 0.0007108824775059475, "clip_ratio/low_mean": 0.000641125325273606, "clip_ratio/low_min": 2.2254446776059922e-05, "clip_ratio/region_mean": 0.0013520078027795535, "epoch": 0.07718647370048241, "grad_norm": 0.12772871553897858, "learning_rate": 2e-07, "loss": 0.0187, "step": 827 }, { "clip_ratio/high_max": 0.0016927984052017564, "clip_ratio/high_mean": 0.0005870232362212846, "clip_ratio/low_mean": 0.0005677641602233052, "clip_ratio/low_min": 8.983358929981478e-05, "clip_ratio/region_mean": 0.0011547874091775157, "epoch": 0.07727980680048299, "grad_norm": 0.12485478818416595, "learning_rate": 2e-07, "loss": 0.0766, "step": 828 }, { "clip_ratio/high_max": 0.001904539625684265, "clip_ratio/high_mean": 0.0007107686105882749, "clip_ratio/low_mean": 0.0005826346487083356, "clip_ratio/low_min": 5.827071981912013e-05, "clip_ratio/region_mean": 0.0012934032674820628, "epoch": 0.07737313990048358, "grad_norm": 0.13496677577495575, "learning_rate": 2e-07, "loss": 0.0224, "step": 829 }, { "clip_ratio/high_max": 0.0015324577943829354, "clip_ratio/high_mean": 0.0006205835179571295, "clip_ratio/low_mean": 0.0006622182881983463, "clip_ratio/low_min": 4.898440874967491e-05, "clip_ratio/region_mean": 0.0012828018097934546, "epoch": 0.07746647300048416, "grad_norm": 0.1365133672952652, "learning_rate": 2e-07, "loss": 0.0514, "step": 830 }, { "clip_ratio/high_max": 0.001999100459215697, "clip_ratio/high_mean": 0.0007701770973653765, "clip_ratio/low_mean": 0.0005273240471979079, "clip_ratio/low_min": 5.404704097600188e-06, "clip_ratio/region_mean": 0.0012975011559319682, "epoch": 0.07755980610048475, "grad_norm": 0.12909482419490814, "learning_rate": 2e-07, "loss": -0.0458, "step": 831 }, { "clip_ratio/high_max": 0.001708604901068611, "clip_ratio/high_mean": 0.0007225973040476674, "clip_ratio/low_mean": 0.0006624727629969129, "clip_ratio/low_min": 8.105641882139025e-05, "clip_ratio/region_mean": 0.0013850700815964956, "epoch": 0.07765313920048533, "grad_norm": 0.15218068659305573, "learning_rate": 2e-07, "loss": 0.017, "step": 832 }, { "clip_ratio/high_max": 0.0018833624417311512, "clip_ratio/high_mean": 0.0007042548495519441, "clip_ratio/low_mean": 0.0006081519522922463, "clip_ratio/low_min": 3.810309226537356e-05, "clip_ratio/region_mean": 0.0013124067809258122, "epoch": 0.07774647230048591, "grad_norm": 0.14196233451366425, "learning_rate": 2e-07, "loss": 0.0507, "step": 833 }, { "clip_ratio/high_max": 0.002149450345314108, "clip_ratio/high_mean": 0.0007868572229199344, "clip_ratio/low_mean": 0.0005962424675090006, "clip_ratio/low_min": 2.8472801204770803e-05, "clip_ratio/region_mean": 0.0013830996977048926, "epoch": 0.0778398054004865, "grad_norm": 0.13571864366531372, "learning_rate": 2e-07, "loss": -0.0178, "step": 834 }, { "clip_ratio/high_max": 0.0019375562405912206, "clip_ratio/high_mean": 0.0007950454019010067, "clip_ratio/low_mean": 0.0006569811812369153, "clip_ratio/low_min": 4.515250566328177e-05, "clip_ratio/region_mean": 0.001452026583137922, "epoch": 0.07793313850048708, "grad_norm": 0.13022808730602264, "learning_rate": 2e-07, "loss": -0.0233, "step": 835 }, { "clip_ratio/high_max": 0.0018788620400300715, "clip_ratio/high_mean": 0.000750087898268248, "clip_ratio/low_mean": 0.000557503704840201, "clip_ratio/low_min": 2.6566921405901667e-05, "clip_ratio/region_mean": 0.0013075916431262158, "epoch": 0.07802647160048766, "grad_norm": 0.14658649265766144, "learning_rate": 2e-07, "loss": 0.0006, "step": 836 }, { "clip_ratio/high_max": 0.0019153979192196857, "clip_ratio/high_mean": 0.0008173178539436776, "clip_ratio/low_mean": 0.0005796231771455496, "clip_ratio/low_min": 5.3002538152213674e-05, "clip_ratio/region_mean": 0.0013969410356367007, "epoch": 0.07811980470048825, "grad_norm": 0.15041302144527435, "learning_rate": 2e-07, "loss": 0.0165, "step": 837 }, { "clip_ratio/high_max": 0.0014782971047679894, "clip_ratio/high_mean": 0.0007079317510942928, "clip_ratio/low_mean": 0.0005419213994173333, "clip_ratio/low_min": 1.3417776244750712e-05, "clip_ratio/region_mean": 0.0012498531505116262, "epoch": 0.07821313780048883, "grad_norm": 0.13171188533306122, "learning_rate": 2e-07, "loss": 0.0184, "step": 838 }, { "clip_ratio/high_max": 0.0018026324905804358, "clip_ratio/high_mean": 0.0007265883568834397, "clip_ratio/low_mean": 0.0006411815256797126, "clip_ratio/low_min": 3.8851818317198195e-05, "clip_ratio/region_mean": 0.0013677699134859722, "epoch": 0.07830647090048942, "grad_norm": 0.14654722809791565, "learning_rate": 2e-07, "loss": 0.0288, "step": 839 }, { "clip_ratio/high_max": 0.0016917094653763343, "clip_ratio/high_mean": 0.0006608433304791106, "clip_ratio/low_mean": 0.0006033857534930576, "clip_ratio/low_min": 4.684438317781314e-05, "clip_ratio/region_mean": 0.0012642290748772211, "epoch": 0.07839980400049, "grad_norm": 0.3462652266025543, "learning_rate": 2e-07, "loss": 0.0416, "step": 840 }, { "clip_ratio/high_max": 0.0015994928035070188, "clip_ratio/high_mean": 0.0006402182398232981, "clip_ratio/low_mean": 0.0006774969060643343, "clip_ratio/low_min": 3.367299677847768e-05, "clip_ratio/region_mean": 0.001317715155892074, "epoch": 0.07849313710049058, "grad_norm": 0.14348195493221283, "learning_rate": 2e-07, "loss": 0.0296, "step": 841 }, { "clip_ratio/high_max": 0.0019061489292653278, "clip_ratio/high_mean": 0.0006882521347506554, "clip_ratio/low_mean": 0.0005911034722885233, "clip_ratio/low_min": 2.8200566703162622e-05, "clip_ratio/region_mean": 0.0012793556379619986, "epoch": 0.07858647020049117, "grad_norm": 0.11689718067646027, "learning_rate": 2e-07, "loss": 0.042, "step": 842 }, { "clip_ratio/high_max": 0.0018843109064619057, "clip_ratio/high_mean": 0.0006780731910112081, "clip_ratio/low_mean": 0.0005604768794000847, "clip_ratio/low_min": 1.3935339666204527e-05, "clip_ratio/region_mean": 0.001238550103153102, "epoch": 0.07867980330049175, "grad_norm": 0.16318903863430023, "learning_rate": 2e-07, "loss": 0.0321, "step": 843 }, { "clip_ratio/high_max": 0.0017581611864443403, "clip_ratio/high_mean": 0.0006828794575994834, "clip_ratio/low_mean": 0.0005826273081765976, "clip_ratio/low_min": 1.7171543277072487e-05, "clip_ratio/region_mean": 0.0012655067694140598, "epoch": 0.07877313640049233, "grad_norm": 0.13932915031909943, "learning_rate": 2e-07, "loss": 0.015, "step": 844 }, { "clip_ratio/high_max": 0.0019657473458210006, "clip_ratio/high_mean": 0.0007430771947838366, "clip_ratio/low_mean": 0.0005935587523708818, "clip_ratio/low_min": 2.2613558030570857e-05, "clip_ratio/region_mean": 0.0013366359671636019, "epoch": 0.07886646950049292, "grad_norm": 0.12274309992790222, "learning_rate": 2e-07, "loss": 0.0315, "step": 845 }, { "clip_ratio/high_max": 0.0019313524098834023, "clip_ratio/high_mean": 0.0007779531060805311, "clip_ratio/low_mean": 0.0005629645743283618, "clip_ratio/low_min": 4.183203145657899e-05, "clip_ratio/region_mean": 0.0013409176644927356, "epoch": 0.0789598026004935, "grad_norm": 0.13740064203739166, "learning_rate": 2e-07, "loss": 0.0123, "step": 846 }, { "clip_ratio/high_max": 0.0020123430440435186, "clip_ratio/high_mean": 0.0007768597861286253, "clip_ratio/low_mean": 0.0006079615832277341, "clip_ratio/low_min": 1.5139281003939686e-05, "clip_ratio/region_mean": 0.0013848213602614123, "epoch": 0.07905313570049408, "grad_norm": 0.15318091213703156, "learning_rate": 2e-07, "loss": 0.0285, "step": 847 }, { "clip_ratio/high_max": 0.001977508349227719, "clip_ratio/high_mean": 0.000745036208172678, "clip_ratio/low_mean": 0.0006471419737863471, "clip_ratio/low_min": 5.3680588280258235e-05, "clip_ratio/region_mean": 0.0013921782119723503, "epoch": 0.07914646880049467, "grad_norm": 0.15552359819412231, "learning_rate": 2e-07, "loss": 0.0388, "step": 848 }, { "clip_ratio/high_max": 0.0019778237692662515, "clip_ratio/high_mean": 0.000709802889105049, "clip_ratio/low_mean": 0.0005543161478271941, "clip_ratio/low_min": 5.338838582247263e-05, "clip_ratio/region_mean": 0.0012641190514841583, "epoch": 0.07923980190049525, "grad_norm": 0.13178296387195587, "learning_rate": 2e-07, "loss": -0.0052, "step": 849 }, { "clip_ratio/high_max": 0.002191538871556986, "clip_ratio/high_mean": 0.0008601342469773954, "clip_ratio/low_mean": 0.000563039459848369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014231736640795134, "epoch": 0.07933313500049584, "grad_norm": 0.14099419116973877, "learning_rate": 2e-07, "loss": -0.0091, "step": 850 }, { "clip_ratio/high_max": 0.0019141512239002623, "clip_ratio/high_mean": 0.0008350371917913435, "clip_ratio/low_mean": 0.0006411176746041747, "clip_ratio/low_min": 3.245910193072632e-05, "clip_ratio/region_mean": 0.0014761548554815818, "epoch": 0.07942646810049642, "grad_norm": 0.13953916728496552, "learning_rate": 2e-07, "loss": -0.0036, "step": 851 }, { "clip_ratio/high_max": 0.001725200087093981, "clip_ratio/high_mean": 0.0006680435326416045, "clip_ratio/low_mean": 0.0006771515527361771, "clip_ratio/low_min": 7.713025706834742e-05, "clip_ratio/region_mean": 0.0013451951017486863, "epoch": 0.079519801200497, "grad_norm": 0.1532098650932312, "learning_rate": 2e-07, "loss": 0.0464, "step": 852 }, { "clip_ratio/high_max": 0.00156048374628881, "clip_ratio/high_mean": 0.0005825934267704724, "clip_ratio/low_mean": 0.0006045477475709049, "clip_ratio/low_min": 2.661591952346498e-05, "clip_ratio/region_mean": 0.0011871411534229992, "epoch": 0.07961313430049759, "grad_norm": 0.1271926909685135, "learning_rate": 2e-07, "loss": 0.0462, "step": 853 }, { "clip_ratio/high_max": 0.00171384729037527, "clip_ratio/high_mean": 0.0007462814282916952, "clip_ratio/low_mean": 0.0006080436096453923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013543250461225398, "epoch": 0.07970646740049817, "grad_norm": 0.1508963257074356, "learning_rate": 2e-07, "loss": 0.0279, "step": 854 }, { "clip_ratio/high_max": 0.001662860951910261, "clip_ratio/high_mean": 0.000676789535646094, "clip_ratio/low_mean": 0.0006402832259482238, "clip_ratio/low_min": 3.708753501996398e-05, "clip_ratio/region_mean": 0.001317072757956339, "epoch": 0.07979980050049874, "grad_norm": 0.14977407455444336, "learning_rate": 2e-07, "loss": 0.0088, "step": 855 }, { "clip_ratio/high_max": 0.0018918046516773757, "clip_ratio/high_mean": 0.0006645467674388783, "clip_ratio/low_mean": 0.0005667572959282552, "clip_ratio/low_min": 2.745173424045788e-05, "clip_ratio/region_mean": 0.001231304067914607, "epoch": 0.07989313360049934, "grad_norm": 0.14309249818325043, "learning_rate": 2e-07, "loss": 0.0489, "step": 856 }, { "clip_ratio/high_max": 0.0022195544152054936, "clip_ratio/high_mean": 0.0007853569968574448, "clip_ratio/low_mean": 0.0005403354780355585, "clip_ratio/low_min": 1.2442763363651466e-05, "clip_ratio/region_mean": 0.0013256925049063284, "epoch": 0.07998646670049991, "grad_norm": 0.1526111364364624, "learning_rate": 2e-07, "loss": 0.0345, "step": 857 }, { "clip_ratio/high_max": 0.0017888219554151874, "clip_ratio/high_mean": 0.0006484762934633181, "clip_ratio/low_mean": 0.0006968006819079164, "clip_ratio/low_min": 5.11996208842902e-05, "clip_ratio/region_mean": 0.0013452769671857823, "epoch": 0.08007979980050049, "grad_norm": 0.15145811438560486, "learning_rate": 2e-07, "loss": 0.0494, "step": 858 }, { "clip_ratio/high_max": 0.002114544444339117, "clip_ratio/high_mean": 0.0008518222293787403, "clip_ratio/low_mean": 0.0005744349846281693, "clip_ratio/low_min": 5.09886049258057e-05, "clip_ratio/region_mean": 0.0014262572549341712, "epoch": 0.08017313290050108, "grad_norm": 0.15437942743301392, "learning_rate": 2e-07, "loss": -0.0257, "step": 859 }, { "clip_ratio/high_max": 0.0017277696606470272, "clip_ratio/high_mean": 0.0007308081858354853, "clip_ratio/low_mean": 0.0006812798474129522, "clip_ratio/low_min": 5.3489121455641e-05, "clip_ratio/region_mean": 0.0014120880405243952, "epoch": 0.08026646600050166, "grad_norm": 0.1509794443845749, "learning_rate": 2e-07, "loss": 0.0339, "step": 860 }, { "clip_ratio/high_max": 0.0016461503655591514, "clip_ratio/high_mean": 0.0006396495082299225, "clip_ratio/low_mean": 0.0006666440040135058, "clip_ratio/low_min": 4.958574572810903e-05, "clip_ratio/region_mean": 0.0013062935031484812, "epoch": 0.08035979910050225, "grad_norm": 0.1817036122083664, "learning_rate": 2e-07, "loss": 0.0629, "step": 861 }, { "clip_ratio/high_max": 0.0018003068435064051, "clip_ratio/high_mean": 0.0006791081668779952, "clip_ratio/low_mean": 0.0007458238414983498, "clip_ratio/low_min": 1.593168417457491e-05, "clip_ratio/region_mean": 0.0014249320047383662, "epoch": 0.08045313220050283, "grad_norm": 0.13310229778289795, "learning_rate": 2e-07, "loss": 0.0466, "step": 862 }, { "clip_ratio/high_max": 0.0019318575650686398, "clip_ratio/high_mean": 0.0008484066456730943, "clip_ratio/low_mean": 0.0007339136136579327, "clip_ratio/low_min": 7.082555111992406e-05, "clip_ratio/region_mean": 0.0015823202884348575, "epoch": 0.08054646530050341, "grad_norm": 0.1393742859363556, "learning_rate": 2e-07, "loss": 0.039, "step": 863 }, { "clip_ratio/high_max": 0.0019195258173567709, "clip_ratio/high_mean": 0.0007930360061436659, "clip_ratio/low_mean": 0.0006757417713743052, "clip_ratio/low_min": 7.871581146901008e-05, "clip_ratio/region_mean": 0.001468777802074328, "epoch": 0.080639798400504, "grad_norm": 0.15871849656105042, "learning_rate": 2e-07, "loss": 0.0162, "step": 864 }, { "clip_ratio/high_max": 0.0018783809391607065, "clip_ratio/high_mean": 0.0007554160856670933, "clip_ratio/low_mean": 0.0006238715595827671, "clip_ratio/low_min": 1.7748117898008786e-05, "clip_ratio/region_mean": 0.001379287670715712, "epoch": 0.08073313150050458, "grad_norm": 0.23202764987945557, "learning_rate": 2e-07, "loss": 0.0565, "step": 865 }, { "clip_ratio/high_max": 0.001651904014579486, "clip_ratio/high_mean": 0.0006512316067528445, "clip_ratio/low_mean": 0.0005772025942860637, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001228434204676887, "epoch": 0.08082646460050516, "grad_norm": 0.16133476793766022, "learning_rate": 2e-07, "loss": 0.025, "step": 866 }, { "clip_ratio/high_max": 0.001793093641026644, "clip_ratio/high_mean": 0.0007651549894944765, "clip_ratio/low_mean": 0.0005958176006970461, "clip_ratio/low_min": 5.548317130887881e-05, "clip_ratio/region_mean": 0.0013609725938295014, "epoch": 0.08091979770050575, "grad_norm": 0.14940930902957916, "learning_rate": 2e-07, "loss": 0.0234, "step": 867 }, { "clip_ratio/high_max": 0.0018528346117818728, "clip_ratio/high_mean": 0.0007613141278852709, "clip_ratio/low_mean": 0.0006358465052471729, "clip_ratio/low_min": 2.1828223907505162e-05, "clip_ratio/region_mean": 0.0013971606604172848, "epoch": 0.08101313080050633, "grad_norm": 0.14557217061519623, "learning_rate": 2e-07, "loss": 0.0407, "step": 868 }, { "clip_ratio/high_max": 0.0014650764242105652, "clip_ratio/high_mean": 0.0006130330029918696, "clip_ratio/low_mean": 0.0006353680910251569, "clip_ratio/low_min": 2.386406958976295e-05, "clip_ratio/region_mean": 0.0012484010694606695, "epoch": 0.08110646390050691, "grad_norm": 0.1532893180847168, "learning_rate": 2e-07, "loss": 0.0673, "step": 869 }, { "clip_ratio/high_max": 0.0020706517170765437, "clip_ratio/high_mean": 0.000806891957836342, "clip_ratio/low_mean": 0.0006271975435083732, "clip_ratio/low_min": 5.02255834362586e-05, "clip_ratio/region_mean": 0.001434089477697853, "epoch": 0.0811997970005075, "grad_norm": 0.14128327369689941, "learning_rate": 2e-07, "loss": 0.0055, "step": 870 }, { "clip_ratio/high_max": 0.00215466323425062, "clip_ratio/high_mean": 0.0007797104026394663, "clip_ratio/low_mean": 0.0005364497510527144, "clip_ratio/low_min": 3.28055130012217e-05, "clip_ratio/region_mean": 0.0013161601491447072, "epoch": 0.08129313010050808, "grad_norm": 0.13566707074642181, "learning_rate": 2e-07, "loss": -0.0016, "step": 871 }, { "clip_ratio/high_max": 0.0018439627201587427, "clip_ratio/high_mean": 0.0007913589215604588, "clip_ratio/low_mean": 0.0006693749437545193, "clip_ratio/low_min": 2.8048916647094302e-05, "clip_ratio/region_mean": 0.001460733823478222, "epoch": 0.08138646320050867, "grad_norm": 0.13910485804080963, "learning_rate": 2e-07, "loss": 0.015, "step": 872 }, { "clip_ratio/high_max": 0.0018143208872061223, "clip_ratio/high_mean": 0.0007449587137671188, "clip_ratio/low_mean": 0.0005542302405956434, "clip_ratio/low_min": 1.4334862498799339e-05, "clip_ratio/region_mean": 0.0012991889379918575, "epoch": 0.08147979630050925, "grad_norm": 0.15003639459609985, "learning_rate": 2e-07, "loss": 0.0362, "step": 873 }, { "clip_ratio/high_max": 0.0019059238329646178, "clip_ratio/high_mean": 0.0007537580986536341, "clip_ratio/low_mean": 0.0006323008783510886, "clip_ratio/low_min": 2.662209499249002e-05, "clip_ratio/region_mean": 0.0013860589497198816, "epoch": 0.08157312940050983, "grad_norm": 0.14419548213481903, "learning_rate": 2e-07, "loss": 0.0322, "step": 874 }, { "clip_ratio/high_max": 0.002204798685852438, "clip_ratio/high_mean": 0.0008017190884856973, "clip_ratio/low_mean": 0.0007860062178224325, "clip_ratio/low_min": 8.801034073258052e-05, "clip_ratio/region_mean": 0.0015877252990321722, "epoch": 0.08166646250051042, "grad_norm": 0.17921356856822968, "learning_rate": 2e-07, "loss": 0.1084, "step": 875 }, { "clip_ratio/high_max": 0.0019459210961940698, "clip_ratio/high_mean": 0.0007523648646383663, "clip_ratio/low_mean": 0.0007814995824446669, "clip_ratio/low_min": 4.3082956835860386e-05, "clip_ratio/region_mean": 0.0015338644116127398, "epoch": 0.081759795600511, "grad_norm": 0.15163950622081757, "learning_rate": 2e-07, "loss": 0.0799, "step": 876 }, { "clip_ratio/high_max": 0.0018663075170479715, "clip_ratio/high_mean": 0.0007977694494911702, "clip_ratio/low_mean": 0.0006706421827402664, "clip_ratio/low_min": 5.3953120186633896e-05, "clip_ratio/region_mean": 0.0014684116322314367, "epoch": 0.08185312870051158, "grad_norm": 0.1420414000749588, "learning_rate": 2e-07, "loss": 0.0079, "step": 877 }, { "clip_ratio/high_max": 0.0018510253357817419, "clip_ratio/high_mean": 0.0007594293128931895, "clip_ratio/low_mean": 0.000539151182238129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012985804823983926, "epoch": 0.08194646180051217, "grad_norm": 0.13674955070018768, "learning_rate": 2e-07, "loss": -0.0191, "step": 878 }, { "clip_ratio/high_max": 0.0018398605025140569, "clip_ratio/high_mean": 0.0007376902549367514, "clip_ratio/low_mean": 0.0006626245349252713, "clip_ratio/low_min": 3.782463409152115e-05, "clip_ratio/region_mean": 0.0014003147844050545, "epoch": 0.08203979490051275, "grad_norm": 0.13474315404891968, "learning_rate": 2e-07, "loss": 0.0137, "step": 879 }, { "clip_ratio/high_max": 0.001908114572870545, "clip_ratio/high_mean": 0.0008513419215887552, "clip_ratio/low_mean": 0.0006196317171998089, "clip_ratio/low_min": 4.35024348917068e-05, "clip_ratio/region_mean": 0.0014709736096847337, "epoch": 0.08213312800051333, "grad_norm": 0.1517268717288971, "learning_rate": 2e-07, "loss": 0.0053, "step": 880 }, { "clip_ratio/high_max": 0.0018969643861055374, "clip_ratio/high_mean": 0.0007638317492819624, "clip_ratio/low_mean": 0.0006869660555821611, "clip_ratio/low_min": 8.024174894671887e-05, "clip_ratio/region_mean": 0.0014507977830362506, "epoch": 0.08222646110051392, "grad_norm": 0.1525135636329651, "learning_rate": 2e-07, "loss": 0.0244, "step": 881 }, { "clip_ratio/high_max": 0.0020202956366119906, "clip_ratio/high_mean": 0.000902481988305226, "clip_ratio/low_mean": 0.0005666356714755238, "clip_ratio/low_min": 2.2552351765625644e-05, "clip_ratio/region_mean": 0.001469117636588635, "epoch": 0.0823197942005145, "grad_norm": 0.14354459941387177, "learning_rate": 2e-07, "loss": -0.0433, "step": 882 }, { "clip_ratio/high_max": 0.0018588870407256763, "clip_ratio/high_mean": 0.0006928618195161107, "clip_ratio/low_mean": 0.0006357043785101268, "clip_ratio/low_min": 2.6517920559854247e-05, "clip_ratio/region_mean": 0.001328566202573711, "epoch": 0.08241312730051509, "grad_norm": 0.1518329679965973, "learning_rate": 2e-07, "loss": 0.0594, "step": 883 }, { "clip_ratio/high_max": 0.0021137019102752674, "clip_ratio/high_mean": 0.0007751684588583885, "clip_ratio/low_mean": 0.0006132919837682493, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013884604632039554, "epoch": 0.08250646040051567, "grad_norm": 0.13898968696594238, "learning_rate": 2e-07, "loss": 0.0177, "step": 884 }, { "clip_ratio/high_max": 0.001791020251403097, "clip_ratio/high_mean": 0.0007508067064918578, "clip_ratio/low_mean": 0.0007330464632104849, "clip_ratio/low_min": 4.32692831964232e-05, "clip_ratio/region_mean": 0.0014838531860732473, "epoch": 0.08259979350051624, "grad_norm": 0.15600574016571045, "learning_rate": 2e-07, "loss": 0.0355, "step": 885 }, { "clip_ratio/high_max": 0.0017020944360410795, "clip_ratio/high_mean": 0.0007278479934029747, "clip_ratio/low_mean": 0.0006418489738280186, "clip_ratio/low_min": 2.0341134131740546e-05, "clip_ratio/region_mean": 0.0013696969726879615, "epoch": 0.08269312660051684, "grad_norm": 0.12929055094718933, "learning_rate": 2e-07, "loss": 0.0119, "step": 886 }, { "clip_ratio/high_max": 0.0022671491096843965, "clip_ratio/high_mean": 0.0008537097673979588, "clip_ratio/low_mean": 0.0006784275210520718, "clip_ratio/low_min": 4.352334690338466e-05, "clip_ratio/region_mean": 0.0015321372702601366, "epoch": 0.08278645970051741, "grad_norm": 0.14595265686511993, "learning_rate": 2e-07, "loss": -0.0096, "step": 887 }, { "clip_ratio/high_max": 0.0017630117872613482, "clip_ratio/high_mean": 0.0007386234210571274, "clip_ratio/low_mean": 0.0006645461653533857, "clip_ratio/low_min": 2.484032665961422e-05, "clip_ratio/region_mean": 0.0014031695791345555, "epoch": 0.08287979280051799, "grad_norm": 0.1422441005706787, "learning_rate": 2e-07, "loss": 0.013, "step": 888 }, { "clip_ratio/high_max": 0.0019285959569970146, "clip_ratio/high_mean": 0.0008281491354864556, "clip_ratio/low_mean": 0.0007437372078129556, "clip_ratio/low_min": 5.040440555603709e-05, "clip_ratio/region_mean": 0.0015718863724032417, "epoch": 0.08297312590051859, "grad_norm": 0.15252764523029327, "learning_rate": 2e-07, "loss": 0.054, "step": 889 }, { "clip_ratio/high_max": 0.0020421528461156413, "clip_ratio/high_mean": 0.0007464885329682147, "clip_ratio/low_mean": 0.0008284678406198509, "clip_ratio/low_min": 0.00014415232089959318, "clip_ratio/region_mean": 0.001574956411786843, "epoch": 0.08306645900051916, "grad_norm": 0.15723568201065063, "learning_rate": 2e-07, "loss": 0.0743, "step": 890 }, { "clip_ratio/high_max": 0.001906704950670246, "clip_ratio/high_mean": 0.0007403245836030692, "clip_ratio/low_mean": 0.0005811370210722089, "clip_ratio/low_min": 3.818219556706026e-05, "clip_ratio/region_mean": 0.0013214616192271933, "epoch": 0.08315979210051976, "grad_norm": 0.14343716204166412, "learning_rate": 2e-07, "loss": 0.0262, "step": 891 }, { "clip_ratio/high_max": 0.0018963750953844283, "clip_ratio/high_mean": 0.0006949220623937435, "clip_ratio/low_mean": 0.0006278076334638172, "clip_ratio/low_min": 5.868816606380278e-05, "clip_ratio/region_mean": 0.0013227296913100872, "epoch": 0.08325312520052033, "grad_norm": 0.15836207568645477, "learning_rate": 2e-07, "loss": 0.0529, "step": 892 }, { "clip_ratio/high_max": 0.001993356410821434, "clip_ratio/high_mean": 0.0008299777800857555, "clip_ratio/low_mean": 0.0005853865186509211, "clip_ratio/low_min": 2.148838211724069e-05, "clip_ratio/region_mean": 0.0014153643060126342, "epoch": 0.08334645830052091, "grad_norm": 0.1637830287218094, "learning_rate": 2e-07, "loss": 0.0236, "step": 893 }, { "clip_ratio/high_max": 0.001752648033289006, "clip_ratio/high_mean": 0.0007815269127604552, "clip_ratio/low_mean": 0.0006834592313680332, "clip_ratio/low_min": 5.323157529346645e-05, "clip_ratio/region_mean": 0.00146498616959434, "epoch": 0.0834397914005215, "grad_norm": 0.15151366591453552, "learning_rate": 2e-07, "loss": -0.0119, "step": 894 }, { "clip_ratio/high_max": 0.001985197661269922, "clip_ratio/high_mean": 0.0008489346164424205, "clip_ratio/low_mean": 0.0007390038481389638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001587938462762395, "epoch": 0.08353312450052208, "grad_norm": 0.2500661611557007, "learning_rate": 2e-07, "loss": 0.0179, "step": 895 }, { "clip_ratio/high_max": 0.0020402373702381738, "clip_ratio/high_mean": 0.0007440393565047998, "clip_ratio/low_mean": 0.0007472659308405127, "clip_ratio/low_min": 1.4595982975151855e-05, "clip_ratio/region_mean": 0.0014913052691554185, "epoch": 0.08362645760052266, "grad_norm": 0.18095657229423523, "learning_rate": 2e-07, "loss": 0.0501, "step": 896 }, { "clip_ratio/high_max": 0.0015967284089128952, "clip_ratio/high_mean": 0.0005814898977405392, "clip_ratio/low_mean": 0.0005112015578561113, "clip_ratio/low_min": 1.4768431356060319e-05, "clip_ratio/region_mean": 0.0010926914346782723, "completions/clipped_ratio": 0.0156773158482143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 656.5914306640625, "completions/mean_terminated_length": 601.811767578125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.08371979070052325, "grad_norm": 0.13549181818962097, "learning_rate": 2e-07, "loss": 0.0215, "num_tokens": 675767717.0, "reward": 0.5926164984703064, "reward_std": 0.18299156427383423, "rewards/simpleverify_reward/mean": 0.5926164984703064, "rewards/simpleverify_reward/std": 0.491349458694458, "step": 897 }, { "clip_ratio/high_max": 0.001561397162731737, "clip_ratio/high_mean": 0.0006934684515726985, "clip_ratio/low_mean": 0.0005822423872814397, "clip_ratio/low_min": 1.4548417311743833e-05, "clip_ratio/region_mean": 0.0012757108597725164, "epoch": 0.08381312380052383, "grad_norm": 0.13175863027572632, "learning_rate": 2e-07, "loss": -0.0201, "step": 898 }, { "clip_ratio/high_max": 0.0015439948147104587, "clip_ratio/high_mean": 0.0005826326914757374, "clip_ratio/low_mean": 0.0005805851342302049, "clip_ratio/low_min": 1.5687750419601798e-05, "clip_ratio/region_mean": 0.0011632178320724051, "epoch": 0.08390645690052441, "grad_norm": 0.12933337688446045, "learning_rate": 2e-07, "loss": 0.0585, "step": 899 }, { "clip_ratio/high_max": 0.0017352116883557756, "clip_ratio/high_mean": 0.0005850824372828356, "clip_ratio/low_mean": 0.0006023151599947596, "clip_ratio/low_min": 6.37005514363409e-05, "clip_ratio/region_mean": 0.0011873975745402277, "epoch": 0.083999790000525, "grad_norm": 0.13575293123722076, "learning_rate": 2e-07, "loss": 0.0718, "step": 900 }, { "clip_ratio/high_max": 0.0015213125698210206, "clip_ratio/high_mean": 0.0005570867942878976, "clip_ratio/low_mean": 0.0005752322904299945, "clip_ratio/low_min": 3.7523769606195856e-05, "clip_ratio/region_mean": 0.0011323190919938497, "epoch": 0.08409312310052558, "grad_norm": 0.1411878764629364, "learning_rate": 2e-07, "loss": 0.0434, "step": 901 }, { "clip_ratio/high_max": 0.0017054592608474195, "clip_ratio/high_mean": 0.0006156637937237974, "clip_ratio/low_mean": 0.000586325616495742, "clip_ratio/low_min": 1.627604251552839e-05, "clip_ratio/region_mean": 0.0012019894347758964, "epoch": 0.08418645620052617, "grad_norm": 0.49078813195228577, "learning_rate": 2e-07, "loss": 0.0353, "step": 902 }, { "clip_ratio/high_max": 0.0015669461790821515, "clip_ratio/high_mean": 0.0005726688368667965, "clip_ratio/low_mean": 0.0005506213110493263, "clip_ratio/low_min": 3.536513486324111e-05, "clip_ratio/region_mean": 0.0011232901561015751, "epoch": 0.08427978930052675, "grad_norm": 0.13290376961231232, "learning_rate": 2e-07, "loss": 0.0641, "step": 903 }, { "clip_ratio/high_max": 0.0017148500992334448, "clip_ratio/high_mean": 0.0007217345191747881, "clip_ratio/low_mean": 0.0006534508639788328, "clip_ratio/low_min": 9.391899766342249e-05, "clip_ratio/region_mean": 0.0013751853948633652, "epoch": 0.08437312240052733, "grad_norm": 0.13851337134838104, "learning_rate": 2e-07, "loss": 0.015, "step": 904 }, { "clip_ratio/high_max": 0.0018928054450952914, "clip_ratio/high_mean": 0.0006379991118592443, "clip_ratio/low_mean": 0.0005193380475247977, "clip_ratio/low_min": 4.041206420879462e-05, "clip_ratio/region_mean": 0.0011573371302802116, "epoch": 0.08446645550052792, "grad_norm": 0.12437933683395386, "learning_rate": 2e-07, "loss": 0.0377, "step": 905 }, { "clip_ratio/high_max": 0.0018234359740745276, "clip_ratio/high_mean": 0.0007438947432092391, "clip_ratio/low_mean": 0.0006263666309678229, "clip_ratio/low_min": 7.627149534528144e-05, "clip_ratio/region_mean": 0.0013702613978239242, "epoch": 0.0845597886005285, "grad_norm": 0.15691876411437988, "learning_rate": 2e-07, "loss": 0.0225, "step": 906 }, { "clip_ratio/high_max": 0.0018110176170011982, "clip_ratio/high_mean": 0.00074298206527601, "clip_ratio/low_mean": 0.0005523205900317407, "clip_ratio/low_min": 5.725006531065446e-05, "clip_ratio/region_mean": 0.0012953026380273513, "epoch": 0.08465312170052908, "grad_norm": 0.14186254143714905, "learning_rate": 2e-07, "loss": -0.0018, "step": 907 }, { "clip_ratio/high_max": 0.001703721773083089, "clip_ratio/high_mean": 0.0007271396261785412, "clip_ratio/low_mean": 0.0005605312971965759, "clip_ratio/low_min": 3.665535768959671e-05, "clip_ratio/region_mean": 0.0012876709406555165, "epoch": 0.08474645480052967, "grad_norm": 0.14035288989543915, "learning_rate": 2e-07, "loss": 0.0427, "step": 908 }, { "clip_ratio/high_max": 0.002129588607203914, "clip_ratio/high_mean": 0.0007434429717250168, "clip_ratio/low_mean": 0.0005603019963018596, "clip_ratio/low_min": 1.1499540050863288e-05, "clip_ratio/region_mean": 0.0013037449643888976, "epoch": 0.08483978790053025, "grad_norm": 0.1578233242034912, "learning_rate": 2e-07, "loss": 0.0126, "step": 909 }, { "clip_ratio/high_max": 0.0014504656683129724, "clip_ratio/high_mean": 0.0006057084792701062, "clip_ratio/low_mean": 0.0005344997198335477, "clip_ratio/low_min": 1.5630205780325923e-05, "clip_ratio/region_mean": 0.0011402082127460744, "epoch": 0.08493312100053083, "grad_norm": 0.1403988003730774, "learning_rate": 2e-07, "loss": 0.0395, "step": 910 }, { "clip_ratio/high_max": 0.0021517268651223276, "clip_ratio/high_mean": 0.0008307988973683678, "clip_ratio/low_mean": 0.00044699527188640786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012777941628883127, "epoch": 0.08502645410053142, "grad_norm": 0.1737595647573471, "learning_rate": 2e-07, "loss": -0.0401, "step": 911 }, { "clip_ratio/high_max": 0.0014656700823252322, "clip_ratio/high_mean": 0.000561972681680345, "clip_ratio/low_mean": 0.0005172374958419823, "clip_ratio/low_min": 2.4955081244115718e-05, "clip_ratio/region_mean": 0.0010792101893457584, "epoch": 0.085119787200532, "grad_norm": 0.15598918497562408, "learning_rate": 2e-07, "loss": 0.0624, "step": 912 }, { "clip_ratio/high_max": 0.0016532461559108924, "clip_ratio/high_mean": 0.000719524068699684, "clip_ratio/low_mean": 0.0006259554556891089, "clip_ratio/low_min": 2.483536627551075e-05, "clip_ratio/region_mean": 0.001345479511655867, "epoch": 0.08521312030053259, "grad_norm": 0.1374732404947281, "learning_rate": 2e-07, "loss": 0.0207, "step": 913 }, { "clip_ratio/high_max": 0.0017545416776556522, "clip_ratio/high_mean": 0.0006950801780476468, "clip_ratio/low_mean": 0.000522357374393323, "clip_ratio/low_min": 3.0222437999327667e-05, "clip_ratio/region_mean": 0.0012174375478934962, "epoch": 0.08530645340053317, "grad_norm": 0.1726432889699936, "learning_rate": 2e-07, "loss": 0.0266, "step": 914 }, { "clip_ratio/high_max": 0.0015062741804285906, "clip_ratio/high_mean": 0.000599268561018107, "clip_ratio/low_mean": 0.0006061369331291644, "clip_ratio/low_min": 8.265898941317573e-05, "clip_ratio/region_mean": 0.001205405485961819, "epoch": 0.08539978650053374, "grad_norm": 0.14759540557861328, "learning_rate": 2e-07, "loss": 0.0693, "step": 915 }, { "clip_ratio/high_max": 0.0016175219025171828, "clip_ratio/high_mean": 0.0006114437546784757, "clip_ratio/low_mean": 0.0006954297114134533, "clip_ratio/low_min": 7.193934288807213e-05, "clip_ratio/region_mean": 0.0013068734624539502, "epoch": 0.08549311960053434, "grad_norm": 0.12956063449382782, "learning_rate": 2e-07, "loss": 0.0369, "step": 916 }, { "clip_ratio/high_max": 0.0017409465108357836, "clip_ratio/high_mean": 0.0006867565411994292, "clip_ratio/low_mean": 0.00053917412787996, "clip_ratio/low_min": 5.930920133323525e-05, "clip_ratio/region_mean": 0.00122593068954302, "epoch": 0.08558645270053492, "grad_norm": 0.13957950472831726, "learning_rate": 2e-07, "loss": -0.0067, "step": 917 }, { "clip_ratio/high_max": 0.0015139422357606236, "clip_ratio/high_mean": 0.0005297754378261743, "clip_ratio/low_mean": 0.0006165146146486222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001146290062024491, "epoch": 0.0856797858005355, "grad_norm": 0.13851527869701385, "learning_rate": 2e-07, "loss": 0.0616, "step": 918 }, { "clip_ratio/high_max": 0.0019366443011676893, "clip_ratio/high_mean": 0.000681002318742685, "clip_ratio/low_mean": 0.0005957832890999271, "clip_ratio/low_min": 2.7754431357607245e-05, "clip_ratio/region_mean": 0.001276785580557771, "epoch": 0.08577311890053609, "grad_norm": 0.13460753858089447, "learning_rate": 2e-07, "loss": 0.0231, "step": 919 }, { "clip_ratio/high_max": 0.0016042291608755477, "clip_ratio/high_mean": 0.0006273482495089411, "clip_ratio/low_mean": 0.0006225861143320799, "clip_ratio/low_min": 4.503178297454724e-05, "clip_ratio/region_mean": 0.0012499343538365792, "epoch": 0.08586645200053666, "grad_norm": 0.14184603095054626, "learning_rate": 2e-07, "loss": 0.0489, "step": 920 }, { "clip_ratio/high_max": 0.001942801729455823, "clip_ratio/high_mean": 0.0007226038778753718, "clip_ratio/low_mean": 0.0006403596162272152, "clip_ratio/low_min": 3.048480903089512e-05, "clip_ratio/region_mean": 0.00136296348500764, "epoch": 0.08595978510053724, "grad_norm": 0.15107688307762146, "learning_rate": 2e-07, "loss": 0.0196, "step": 921 }, { "clip_ratio/high_max": 0.0020201020161039196, "clip_ratio/high_mean": 0.000785386486313655, "clip_ratio/low_mean": 0.0005606232361969887, "clip_ratio/low_min": 3.248440771130845e-05, "clip_ratio/region_mean": 0.0013460097361530643, "epoch": 0.08605311820053783, "grad_norm": 0.14862816035747528, "learning_rate": 2e-07, "loss": -0.0069, "step": 922 }, { "clip_ratio/high_max": 0.001336767956672702, "clip_ratio/high_mean": 0.0005826861670357175, "clip_ratio/low_mean": 0.0006115518899605377, "clip_ratio/low_min": 3.524709609337151e-05, "clip_ratio/region_mean": 0.001194238069729181, "epoch": 0.08614645130053841, "grad_norm": 0.1520920693874359, "learning_rate": 2e-07, "loss": 0.0586, "step": 923 }, { "clip_ratio/high_max": 0.0017420677686459385, "clip_ratio/high_mean": 0.0006739973923686193, "clip_ratio/low_mean": 0.0006116251734056277, "clip_ratio/low_min": 0.00010688203474273905, "clip_ratio/region_mean": 0.001285622583964141, "epoch": 0.086239784400539, "grad_norm": 0.1415053904056549, "learning_rate": 2e-07, "loss": 0.0079, "step": 924 }, { "clip_ratio/high_max": 0.0019110435059701558, "clip_ratio/high_mean": 0.0007354313183896011, "clip_ratio/low_mean": 0.000587212964092032, "clip_ratio/low_min": 2.368580180700519e-05, "clip_ratio/region_mean": 0.0013226442824816331, "epoch": 0.08633311750053958, "grad_norm": 0.1487334817647934, "learning_rate": 2e-07, "loss": 0.0482, "step": 925 }, { "clip_ratio/high_max": 0.0017633367679081857, "clip_ratio/high_mean": 0.0007152704256441211, "clip_ratio/low_mean": 0.0006217048030521255, "clip_ratio/low_min": 1.1040452591259964e-05, "clip_ratio/region_mean": 0.0013369752196012996, "epoch": 0.08642645060054016, "grad_norm": 0.14829230308532715, "learning_rate": 2e-07, "loss": -0.006, "step": 926 }, { "clip_ratio/high_max": 0.001627738049137406, "clip_ratio/high_mean": 0.0006572049569513183, "clip_ratio/low_mean": 0.000583134360567783, "clip_ratio/low_min": 2.636748286022339e-05, "clip_ratio/region_mean": 0.0012403393193380907, "epoch": 0.08651978370054075, "grad_norm": 0.15884599089622498, "learning_rate": 2e-07, "loss": 0.0419, "step": 927 }, { "clip_ratio/high_max": 0.0018461713116266765, "clip_ratio/high_mean": 0.0007027570391073823, "clip_ratio/low_mean": 0.0005533672092497, "clip_ratio/low_min": 1.4204545550455805e-05, "clip_ratio/region_mean": 0.0012561242692754604, "epoch": 0.08661311680054133, "grad_norm": 0.148577019572258, "learning_rate": 2e-07, "loss": 0.0203, "step": 928 }, { "clip_ratio/high_max": 0.0017364769119012635, "clip_ratio/high_mean": 0.0006874520076962654, "clip_ratio/low_mean": 0.0006706906260660617, "clip_ratio/low_min": 1.5424482626258396e-05, "clip_ratio/region_mean": 0.0013581426392192952, "epoch": 0.08670644990054191, "grad_norm": 0.14356404542922974, "learning_rate": 2e-07, "loss": 0.0074, "step": 929 }, { "clip_ratio/high_max": 0.0018763012012641411, "clip_ratio/high_mean": 0.0006698812630929751, "clip_ratio/low_mean": 0.0006113684539741371, "clip_ratio/low_min": 4.465650363272289e-05, "clip_ratio/region_mean": 0.0012812497334380168, "epoch": 0.0867997830005425, "grad_norm": 0.13560567796230316, "learning_rate": 2e-07, "loss": 0.0246, "step": 930 }, { "clip_ratio/high_max": 0.0016894715663511306, "clip_ratio/high_mean": 0.0006424582534236833, "clip_ratio/low_mean": 0.0005979287079753703, "clip_ratio/low_min": 4.0012211684370413e-05, "clip_ratio/region_mean": 0.001240386969584506, "epoch": 0.08689311610054308, "grad_norm": 0.1492682844400406, "learning_rate": 2e-07, "loss": 0.057, "step": 931 }, { "clip_ratio/high_max": 0.0016340763722837437, "clip_ratio/high_mean": 0.0006762046014046064, "clip_ratio/low_mean": 0.0005550609121200978, "clip_ratio/low_min": 3.698825275932904e-05, "clip_ratio/region_mean": 0.0012312655126152094, "epoch": 0.08698644920054366, "grad_norm": 0.14825674891471863, "learning_rate": 2e-07, "loss": 0.0285, "step": 932 }, { "clip_ratio/high_max": 0.0018011686115642078, "clip_ratio/high_mean": 0.0006909989351697732, "clip_ratio/low_mean": 0.0006165472168504493, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013075461465632543, "epoch": 0.08707978230054425, "grad_norm": 0.13232611119747162, "learning_rate": 2e-07, "loss": 0.0085, "step": 933 }, { "clip_ratio/high_max": 0.0017766154232958797, "clip_ratio/high_mean": 0.0007537573073932435, "clip_ratio/low_mean": 0.0005564579169003991, "clip_ratio/low_min": 3.840309182123747e-05, "clip_ratio/region_mean": 0.001310215226112632, "epoch": 0.08717311540054483, "grad_norm": 0.15140919387340546, "learning_rate": 2e-07, "loss": -0.0191, "step": 934 }, { "clip_ratio/high_max": 0.0015247852716129273, "clip_ratio/high_mean": 0.0005935785629844759, "clip_ratio/low_mean": 0.0006426573763747001, "clip_ratio/low_min": 1.0676460988179315e-05, "clip_ratio/region_mean": 0.0012362359411781654, "epoch": 0.08726644850054542, "grad_norm": 0.13850785791873932, "learning_rate": 2e-07, "loss": 0.0525, "step": 935 }, { "clip_ratio/high_max": 0.001841538236476481, "clip_ratio/high_mean": 0.0006383438649208983, "clip_ratio/low_mean": 0.0006129048979346408, "clip_ratio/low_min": 3.213452964700991e-05, "clip_ratio/region_mean": 0.0012512487919593696, "epoch": 0.087359781600546, "grad_norm": 0.40165892243385315, "learning_rate": 2e-07, "loss": 0.0563, "step": 936 }, { "clip_ratio/high_max": 0.0017364050072501414, "clip_ratio/high_mean": 0.0007075999419612344, "clip_ratio/low_mean": 0.0006564254217664711, "clip_ratio/low_min": 1.262116347788833e-05, "clip_ratio/region_mean": 0.0013640253637277056, "epoch": 0.08745311470054658, "grad_norm": 0.14047060906887054, "learning_rate": 2e-07, "loss": 0.0173, "step": 937 }, { "clip_ratio/high_max": 0.0014446389286604244, "clip_ratio/high_mean": 0.0005970032543700654, "clip_ratio/low_mean": 0.0005993042232148582, "clip_ratio/low_min": 8.530094419256784e-06, "clip_ratio/region_mean": 0.0011963074684899766, "epoch": 0.08754644780054717, "grad_norm": 0.13327822089195251, "learning_rate": 2e-07, "loss": 0.0001, "step": 938 }, { "clip_ratio/high_max": 0.0017155137720692437, "clip_ratio/high_mean": 0.0007171968845796073, "clip_ratio/low_mean": 0.0006034867337803007, "clip_ratio/low_min": 3.735063910426106e-05, "clip_ratio/region_mean": 0.0013206836229073815, "epoch": 0.08763978090054775, "grad_norm": 0.14642290771007538, "learning_rate": 2e-07, "loss": 0.0222, "step": 939 }, { "clip_ratio/high_max": 0.0018498821045795921, "clip_ratio/high_mean": 0.0006654855114902603, "clip_ratio/low_mean": 0.0006289547391133965, "clip_ratio/low_min": 2.3649324248253834e-05, "clip_ratio/region_mean": 0.0012944402733410243, "epoch": 0.08773311400054833, "grad_norm": 0.15709646046161652, "learning_rate": 2e-07, "loss": 0.0301, "step": 940 }, { "clip_ratio/high_max": 0.0018532064714236185, "clip_ratio/high_mean": 0.0008154071911121719, "clip_ratio/low_mean": 0.0006354513379847049, "clip_ratio/low_min": 3.778658174269367e-05, "clip_ratio/region_mean": 0.0014508585336443502, "epoch": 0.08782644710054892, "grad_norm": 0.14968709647655487, "learning_rate": 2e-07, "loss": 0.0133, "step": 941 }, { "clip_ratio/high_max": 0.001614033906662371, "clip_ratio/high_mean": 0.0006580384579137899, "clip_ratio/low_mean": 0.0006329300194920506, "clip_ratio/low_min": 6.168903746583965e-05, "clip_ratio/region_mean": 0.001290968488319777, "epoch": 0.0879197802005495, "grad_norm": 0.1360132247209549, "learning_rate": 2e-07, "loss": 0.0583, "step": 942 }, { "clip_ratio/high_max": 0.0014058155975362752, "clip_ratio/high_mean": 0.0005821979393658694, "clip_ratio/low_mean": 0.0005943457090324955, "clip_ratio/low_min": 2.31089234148385e-05, "clip_ratio/region_mean": 0.0011765436684072483, "epoch": 0.08801311330055009, "grad_norm": 0.144170343875885, "learning_rate": 2e-07, "loss": 0.0568, "step": 943 }, { "clip_ratio/high_max": 0.0019847943767672405, "clip_ratio/high_mean": 0.0007670282193430467, "clip_ratio/low_mean": 0.0005996203517497634, "clip_ratio/low_min": 5.718206739402376e-05, "clip_ratio/region_mean": 0.00136664858655422, "epoch": 0.08810644640055067, "grad_norm": 0.1375730335712433, "learning_rate": 2e-07, "loss": -0.0014, "step": 944 }, { "clip_ratio/high_max": 0.0017257658546441235, "clip_ratio/high_mean": 0.0006997886466706404, "clip_ratio/low_mean": 0.0006797026635467773, "clip_ratio/low_min": 1.1316313248244114e-05, "clip_ratio/region_mean": 0.0013794913284073118, "epoch": 0.08819977950055125, "grad_norm": 0.15689490735530853, "learning_rate": 2e-07, "loss": 0.0053, "step": 945 }, { "clip_ratio/high_max": 0.0018375839390500914, "clip_ratio/high_mean": 0.000675853121720138, "clip_ratio/low_mean": 0.0005929541903242352, "clip_ratio/low_min": 1.6348418284906074e-05, "clip_ratio/region_mean": 0.0012688073184108362, "epoch": 0.08829311260055184, "grad_norm": 0.1540026068687439, "learning_rate": 2e-07, "loss": 0.0408, "step": 946 }, { "clip_ratio/high_max": 0.0017043523512256797, "clip_ratio/high_mean": 0.0006297641994024161, "clip_ratio/low_mean": 0.000565444415769889, "clip_ratio/low_min": 9.851828508544713e-06, "clip_ratio/region_mean": 0.001195208591525443, "epoch": 0.08838644570055242, "grad_norm": 0.1712838113307953, "learning_rate": 2e-07, "loss": 0.0378, "step": 947 }, { "clip_ratio/high_max": 0.002181917781854281, "clip_ratio/high_mean": 0.0008172517746061203, "clip_ratio/low_mean": 0.0006702996561216423, "clip_ratio/low_min": 8.762372362980386e-05, "clip_ratio/region_mean": 0.0014875514170853421, "epoch": 0.088479778800553, "grad_norm": 0.14389191567897797, "learning_rate": 2e-07, "loss": -0.0139, "step": 948 }, { "clip_ratio/high_max": 0.0016152310636243783, "clip_ratio/high_mean": 0.0006794294713472482, "clip_ratio/low_mean": 0.0006726663141307654, "clip_ratio/low_min": 5.0409030336595606e-05, "clip_ratio/region_mean": 0.0013520957800210454, "epoch": 0.08857311190055359, "grad_norm": 0.15391457080841064, "learning_rate": 2e-07, "loss": 0.0575, "step": 949 }, { "clip_ratio/high_max": 0.0014913881677784957, "clip_ratio/high_mean": 0.0006032635674273479, "clip_ratio/low_mean": 0.0006028328989486909, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012060964509146288, "epoch": 0.08866644500055416, "grad_norm": 0.155614972114563, "learning_rate": 2e-07, "loss": 0.0718, "step": 950 }, { "clip_ratio/high_max": 0.001612062729691388, "clip_ratio/high_mean": 0.0006614865287701832, "clip_ratio/low_mean": 0.0005185445534152677, "clip_ratio/low_min": 9.370314728585072e-06, "clip_ratio/region_mean": 0.001180031082185451, "epoch": 0.08875977810055474, "grad_norm": 0.1615762710571289, "learning_rate": 2e-07, "loss": 0.0624, "step": 951 }, { "clip_ratio/high_max": 0.0019262257046648301, "clip_ratio/high_mean": 0.0008171762674464844, "clip_ratio/low_mean": 0.0006965325646888232, "clip_ratio/low_min": 3.5699980799108744e-05, "clip_ratio/region_mean": 0.0015137088412302546, "epoch": 0.08885311120055533, "grad_norm": 0.14786235988140106, "learning_rate": 2e-07, "loss": 0.0195, "step": 952 }, { "clip_ratio/high_max": 0.001610243140021339, "clip_ratio/high_mean": 0.0006459336646003067, "clip_ratio/low_mean": 0.0006756033235433279, "clip_ratio/low_min": 5.798095298814587e-05, "clip_ratio/region_mean": 0.0013215369908721186, "epoch": 0.08894644430055591, "grad_norm": 0.15719041228294373, "learning_rate": 2e-07, "loss": 0.0422, "step": 953 }, { "clip_ratio/high_max": 0.0016098885935207363, "clip_ratio/high_mean": 0.0006598408817808377, "clip_ratio/low_mean": 0.0005663799929607194, "clip_ratio/low_min": 2.0966119336662814e-05, "clip_ratio/region_mean": 0.0012262208583706524, "epoch": 0.0890397774005565, "grad_norm": 0.1461481899023056, "learning_rate": 2e-07, "loss": 0.0566, "step": 954 }, { "clip_ratio/high_max": 0.0018223091537947766, "clip_ratio/high_mean": 0.0007399610276479507, "clip_ratio/low_mean": 0.0006216908459464321, "clip_ratio/low_min": 5.9917666931141866e-05, "clip_ratio/region_mean": 0.0013616518990602344, "epoch": 0.08913311050055708, "grad_norm": 0.14125525951385498, "learning_rate": 2e-07, "loss": 0.0134, "step": 955 }, { "clip_ratio/high_max": 0.0016872727501322515, "clip_ratio/high_mean": 0.0006801241656830825, "clip_ratio/low_mean": 0.0007370338880718919, "clip_ratio/low_min": 5.667260666086804e-05, "clip_ratio/region_mean": 0.001417158084223047, "epoch": 0.08922644360055766, "grad_norm": 0.14837798476219177, "learning_rate": 2e-07, "loss": 0.0236, "step": 956 }, { "clip_ratio/high_max": 0.002018110408243956, "clip_ratio/high_mean": 0.00075513698220675, "clip_ratio/low_mean": 0.0006421748475986533, "clip_ratio/low_min": 5.32309350091964e-05, "clip_ratio/region_mean": 0.0013973118329886347, "epoch": 0.08931977670055825, "grad_norm": 0.1685246229171753, "learning_rate": 2e-07, "loss": 0.0322, "step": 957 }, { "clip_ratio/high_max": 0.0017048562804120593, "clip_ratio/high_mean": 0.0007253068815771258, "clip_ratio/low_mean": 0.0006515228888019919, "clip_ratio/low_min": 5.554386461881222e-05, "clip_ratio/region_mean": 0.0013768297831120435, "epoch": 0.08941310980055883, "grad_norm": 0.15923207998275757, "learning_rate": 2e-07, "loss": 0.0435, "step": 958 }, { "clip_ratio/high_max": 0.0016913980143726803, "clip_ratio/high_mean": 0.0006519774433400016, "clip_ratio/low_mean": 0.00058609993175196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012380773605400464, "epoch": 0.08950644290055941, "grad_norm": 0.1433023363351822, "learning_rate": 2e-07, "loss": 0.0412, "step": 959 }, { "clip_ratio/high_max": 0.00195053845163784, "clip_ratio/high_mean": 0.000752691155867069, "clip_ratio/low_mean": 0.0006733030058967415, "clip_ratio/low_min": 1.896813409985043e-05, "clip_ratio/region_mean": 0.0014259941781347152, "epoch": 0.08959977600056, "grad_norm": 0.17645296454429626, "learning_rate": 2e-07, "loss": 0.0259, "step": 960 }, { "clip_ratio/high_max": 0.0017333344658254646, "clip_ratio/high_mean": 0.0007236578121592174, "clip_ratio/low_mean": 0.0007232475527416682, "clip_ratio/low_min": 4.7980017370719e-05, "clip_ratio/region_mean": 0.001446905360353412, "epoch": 0.08969310910056058, "grad_norm": 0.14295274019241333, "learning_rate": 2e-07, "loss": 0.03, "step": 961 }, { "clip_ratio/high_max": 0.001791419112123549, "clip_ratio/high_mean": 0.0007514136323152343, "clip_ratio/low_mean": 0.0007780562318657758, "clip_ratio/low_min": 3.87216769013321e-05, "clip_ratio/region_mean": 0.0015294698387151584, "epoch": 0.08978644220056116, "grad_norm": 0.14367815852165222, "learning_rate": 2e-07, "loss": 0.0282, "step": 962 }, { "clip_ratio/high_max": 0.0016306404977513012, "clip_ratio/high_mean": 0.0006500556573882932, "clip_ratio/low_mean": 0.0006211377840372734, "clip_ratio/low_min": 2.5136391741398256e-05, "clip_ratio/region_mean": 0.0012711934286926407, "epoch": 0.08987977530056175, "grad_norm": 1.6643041372299194, "learning_rate": 2e-07, "loss": 0.0363, "step": 963 }, { "clip_ratio/high_max": 0.0017155869318230543, "clip_ratio/high_mean": 0.0007564597945020068, "clip_ratio/low_mean": 0.0005712039474019548, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001327663761912845, "epoch": 0.08997310840056233, "grad_norm": 0.15755465626716614, "learning_rate": 2e-07, "loss": 0.0056, "step": 964 }, { "clip_ratio/high_max": 0.001731225576804718, "clip_ratio/high_mean": 0.0006712662543577608, "clip_ratio/low_mean": 0.0006580202807526803, "clip_ratio/low_min": 6.380548438755795e-05, "clip_ratio/region_mean": 0.0013292865223775152, "epoch": 0.09006644150056292, "grad_norm": 0.1681397706270218, "learning_rate": 2e-07, "loss": 0.0549, "step": 965 }, { "clip_ratio/high_max": 0.001717697818094166, "clip_ratio/high_mean": 0.000690625671268208, "clip_ratio/low_mean": 0.0006659329774265643, "clip_ratio/low_min": 9.188556759909261e-05, "clip_ratio/region_mean": 0.0013565586268668994, "epoch": 0.0901597746005635, "grad_norm": 0.22851037979125977, "learning_rate": 2e-07, "loss": 0.0239, "step": 966 }, { "clip_ratio/high_max": 0.0018179745893576182, "clip_ratio/high_mean": 0.0007173036992753623, "clip_ratio/low_mean": 0.000673287051540683, "clip_ratio/low_min": 4.505121614784002e-05, "clip_ratio/region_mean": 0.0013905907835578546, "epoch": 0.09025310770056408, "grad_norm": 0.1608322411775589, "learning_rate": 2e-07, "loss": 0.025, "step": 967 }, { "clip_ratio/high_max": 0.0018123205118172336, "clip_ratio/high_mean": 0.0007438417105731787, "clip_ratio/low_mean": 0.0006748748455720488, "clip_ratio/low_min": 4.9847900754684815e-05, "clip_ratio/region_mean": 0.0014187165506882593, "epoch": 0.09034644080056467, "grad_norm": 0.16142162680625916, "learning_rate": 2e-07, "loss": 0.023, "step": 968 }, { "clip_ratio/high_max": 0.0018675397732295096, "clip_ratio/high_mean": 0.0008375347533728927, "clip_ratio/low_mean": 0.0006306738359853625, "clip_ratio/low_min": 5.7487067806505365e-05, "clip_ratio/region_mean": 0.0014682086257380433, "epoch": 0.09043977390056525, "grad_norm": 0.1536380797624588, "learning_rate": 2e-07, "loss": -0.0093, "step": 969 }, { "clip_ratio/high_max": 0.002178411694330862, "clip_ratio/high_mean": 0.0007840615362511016, "clip_ratio/low_mean": 0.0005862502966920147, "clip_ratio/low_min": 4.741355496662436e-05, "clip_ratio/region_mean": 0.0013703118347621057, "epoch": 0.09053310700056583, "grad_norm": 0.1530255675315857, "learning_rate": 2e-07, "loss": 0.0087, "step": 970 }, { "clip_ratio/high_max": 0.0015990673746273387, "clip_ratio/high_mean": 0.000661393032714841, "clip_ratio/low_mean": 0.0007368735641648527, "clip_ratio/low_min": 7.986077889654553e-05, "clip_ratio/region_mean": 0.001398266620526556, "epoch": 0.09062644010056642, "grad_norm": 0.24544879794120789, "learning_rate": 2e-07, "loss": 0.0474, "step": 971 }, { "clip_ratio/high_max": 0.002109720906446455, "clip_ratio/high_mean": 0.0008268377505373792, "clip_ratio/low_mean": 0.0006866824141980032, "clip_ratio/low_min": 8.871440695656929e-05, "clip_ratio/region_mean": 0.0015135201974771917, "epoch": 0.090719773200567, "grad_norm": 0.18482695519924164, "learning_rate": 2e-07, "loss": -0.0128, "step": 972 }, { "clip_ratio/high_max": 0.0018456605030223727, "clip_ratio/high_mean": 0.0007896169390733121, "clip_ratio/low_mean": 0.0005981198628433049, "clip_ratio/low_min": 2.581577791715972e-05, "clip_ratio/region_mean": 0.0013877368401153944, "epoch": 0.09081310630056758, "grad_norm": 0.1758248209953308, "learning_rate": 2e-07, "loss": -0.0153, "step": 973 }, { "clip_ratio/high_max": 0.002118102536769584, "clip_ratio/high_mean": 0.0007917348539194791, "clip_ratio/low_mean": 0.0007140970428736182, "clip_ratio/low_min": 8.483541705572861e-05, "clip_ratio/region_mean": 0.0015058318895171396, "epoch": 0.09090643940056817, "grad_norm": 0.1552230417728424, "learning_rate": 2e-07, "loss": 0.0516, "step": 974 }, { "clip_ratio/high_max": 0.0017773111467249691, "clip_ratio/high_mean": 0.000749592878491967, "clip_ratio/low_mean": 0.0006149510190880392, "clip_ratio/low_min": 9.192528523271903e-06, "clip_ratio/region_mean": 0.0013645438884850591, "epoch": 0.09099977250056875, "grad_norm": 0.14404024183750153, "learning_rate": 2e-07, "loss": 0.0324, "step": 975 }, { "clip_ratio/high_max": 0.0019166903948644176, "clip_ratio/high_mean": 0.0008207044575101463, "clip_ratio/low_mean": 0.0006006146222716779, "clip_ratio/low_min": 3.535164341883501e-05, "clip_ratio/region_mean": 0.0014213190734153613, "epoch": 0.09109310560056934, "grad_norm": 0.15472853183746338, "learning_rate": 2e-07, "loss": -0.0117, "step": 976 }, { "clip_ratio/high_max": 0.0019044246728299186, "clip_ratio/high_mean": 0.0007340698757616337, "clip_ratio/low_mean": 0.0005961064734947286, "clip_ratio/low_min": 6.175304224598221e-05, "clip_ratio/region_mean": 0.0013301763756317087, "epoch": 0.09118643870056992, "grad_norm": 0.14880219101905823, "learning_rate": 2e-07, "loss": 0.0318, "step": 977 }, { "clip_ratio/high_max": 0.0018744719527603593, "clip_ratio/high_mean": 0.0007697995133639779, "clip_ratio/low_mean": 0.0006657738267676905, "clip_ratio/low_min": 5.0981372623937204e-05, "clip_ratio/region_mean": 0.0014355733255797531, "epoch": 0.0912797718005705, "grad_norm": 0.1656811535358429, "learning_rate": 2e-07, "loss": 0.0674, "step": 978 }, { "clip_ratio/high_max": 0.0016899645197554491, "clip_ratio/high_mean": 0.0007351214553636964, "clip_ratio/low_mean": 0.000683677448250819, "clip_ratio/low_min": 4.567634550767252e-05, "clip_ratio/region_mean": 0.0014187989218044095, "epoch": 0.09137310490057109, "grad_norm": 0.14926370978355408, "learning_rate": 2e-07, "loss": 0.0221, "step": 979 }, { "clip_ratio/high_max": 0.0017840222681115847, "clip_ratio/high_mean": 0.0007128581091819797, "clip_ratio/low_mean": 0.000630498865575646, "clip_ratio/low_min": 4.008272935607238e-05, "clip_ratio/region_mean": 0.0013433570056804456, "epoch": 0.09146643800057166, "grad_norm": 0.19268134236335754, "learning_rate": 2e-07, "loss": 0.0213, "step": 980 }, { "clip_ratio/high_max": 0.002111333727953024, "clip_ratio/high_mean": 0.000782511975558009, "clip_ratio/low_mean": 0.0007024190308584366, "clip_ratio/low_min": 1.5723269825684838e-05, "clip_ratio/region_mean": 0.0014849309955025092, "epoch": 0.09155977110057224, "grad_norm": 28.13702964782715, "learning_rate": 2e-07, "loss": 0.0576, "step": 981 }, { "clip_ratio/high_max": 0.0022352999440045096, "clip_ratio/high_mean": 0.0008797888294793665, "clip_ratio/low_mean": 0.0006075310084270313, "clip_ratio/low_min": 1.3507672520063352e-05, "clip_ratio/region_mean": 0.001487319816078525, "epoch": 0.09165310420057284, "grad_norm": 0.1435876339673996, "learning_rate": 2e-07, "loss": -0.0289, "step": 982 }, { "clip_ratio/high_max": 0.0019384174447623082, "clip_ratio/high_mean": 0.0007688148034503683, "clip_ratio/low_mean": 0.0006346751433738973, "clip_ratio/low_min": 1.4269406165112741e-05, "clip_ratio/region_mean": 0.0014034899431862868, "epoch": 0.09174643730057341, "grad_norm": 0.18740825355052948, "learning_rate": 2e-07, "loss": 0.0169, "step": 983 }, { "clip_ratio/high_max": 0.0017731858606566675, "clip_ratio/high_mean": 0.0006949472763153608, "clip_ratio/low_mean": 0.0005882402692805044, "clip_ratio/low_min": 3.688629385578679e-05, "clip_ratio/region_mean": 0.0012831875501433387, "epoch": 0.09183977040057399, "grad_norm": 0.1613827645778656, "learning_rate": 2e-07, "loss": 0.0085, "step": 984 }, { "clip_ratio/high_max": 0.0020426520059118047, "clip_ratio/high_mean": 0.0008113142121146666, "clip_ratio/low_mean": 0.0006762327111573541, "clip_ratio/low_min": 3.87813352062949e-05, "clip_ratio/region_mean": 0.0014875469387334306, "epoch": 0.09193310350057458, "grad_norm": 0.165876105427742, "learning_rate": 2e-07, "loss": 0.0427, "step": 985 }, { "clip_ratio/high_max": 0.002096603697282262, "clip_ratio/high_mean": 0.0008325785547640407, "clip_ratio/low_mean": 0.0006260246409510728, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014586031684302725, "epoch": 0.09202643660057516, "grad_norm": 0.1517435759305954, "learning_rate": 2e-07, "loss": 0.0032, "step": 986 }, { "clip_ratio/high_max": 0.001742502488923492, "clip_ratio/high_mean": 0.0006964906460780185, "clip_ratio/low_mean": 0.0006305426832113881, "clip_ratio/low_min": 3.177606595272664e-05, "clip_ratio/region_mean": 0.001327033340203343, "epoch": 0.09211976970057575, "grad_norm": 0.16995000839233398, "learning_rate": 2e-07, "loss": 0.0204, "step": 987 }, { "clip_ratio/high_max": 0.0018151459298678674, "clip_ratio/high_mean": 0.0007264230598593713, "clip_ratio/low_mean": 0.0007662876778340433, "clip_ratio/low_min": 5.200099622015841e-06, "clip_ratio/region_mean": 0.0014927107258699834, "epoch": 0.09221310280057633, "grad_norm": 0.1759267896413803, "learning_rate": 2e-07, "loss": 0.0761, "step": 988 }, { "clip_ratio/high_max": 0.0018290148946107365, "clip_ratio/high_mean": 0.000724421381164575, "clip_ratio/low_mean": 0.0007785979050822789, "clip_ratio/low_min": 9.506228798272787e-05, "clip_ratio/region_mean": 0.0015030193208076525, "epoch": 0.09230643590057691, "grad_norm": 0.17499110102653503, "learning_rate": 2e-07, "loss": 0.0818, "step": 989 }, { "clip_ratio/high_max": 0.0019003119887202047, "clip_ratio/high_mean": 0.0007314792746910825, "clip_ratio/low_mean": 0.0006544136304000858, "clip_ratio/low_min": 2.379772013227921e-05, "clip_ratio/region_mean": 0.0013858929160051048, "epoch": 0.0923997690005775, "grad_norm": 0.1619679480791092, "learning_rate": 2e-07, "loss": 0.0232, "step": 990 }, { "clip_ratio/high_max": 0.0018052139894280117, "clip_ratio/high_mean": 0.0007053463323245523, "clip_ratio/low_mean": 0.000662964037474012, "clip_ratio/low_min": 3.8276586565189064e-05, "clip_ratio/region_mean": 0.0013683103970834054, "epoch": 0.09249310210057808, "grad_norm": 0.17500336468219757, "learning_rate": 2e-07, "loss": 0.0354, "step": 991 }, { "clip_ratio/high_max": 0.0020604409037332516, "clip_ratio/high_mean": 0.0007385460885416251, "clip_ratio/low_mean": 0.0007158915032050572, "clip_ratio/low_min": 7.487781113013625e-05, "clip_ratio/region_mean": 0.0014544375844707247, "epoch": 0.09258643520057866, "grad_norm": 1.2302303314208984, "learning_rate": 2e-07, "loss": 0.0624, "step": 992 }, { "clip_ratio/high_max": 0.0020206824847264215, "clip_ratio/high_mean": 0.0007982680399436504, "clip_ratio/low_mean": 0.0007925058853288647, "clip_ratio/low_min": 0.00013087082334095612, "clip_ratio/region_mean": 0.0015907739398244303, "epoch": 0.09267976830057925, "grad_norm": 0.1701882779598236, "learning_rate": 2e-07, "loss": 0.0476, "step": 993 }, { "clip_ratio/high_max": 0.001924390446220059, "clip_ratio/high_mean": 0.0007263246188813355, "clip_ratio/low_mean": 0.0007194406643975526, "clip_ratio/low_min": 6.150864010123769e-05, "clip_ratio/region_mean": 0.0014457652687269729, "epoch": 0.09277310140057983, "grad_norm": 0.17477288842201233, "learning_rate": 2e-07, "loss": 0.0256, "step": 994 }, { "clip_ratio/high_max": 0.0016744578788348008, "clip_ratio/high_mean": 0.0007657353689864976, "clip_ratio/low_mean": 0.0006010684746797779, "clip_ratio/low_min": 3.2638732591294684e-05, "clip_ratio/region_mean": 0.0013668038627656642, "epoch": 0.09286643450058042, "grad_norm": 0.15913796424865723, "learning_rate": 2e-07, "loss": -0.0048, "step": 995 }, { "clip_ratio/high_max": 0.0017527682794025168, "clip_ratio/high_mean": 0.0007131708371161949, "clip_ratio/low_mean": 0.0008000614998309175, "clip_ratio/low_min": 5.500524912349647e-05, "clip_ratio/region_mean": 0.0015132323278521653, "epoch": 0.092959767600581, "grad_norm": 0.17258119583129883, "learning_rate": 2e-07, "loss": 0.0848, "step": 996 }, { "clip_ratio/high_max": 0.0018024387463810854, "clip_ratio/high_mean": 0.0007004461422184249, "clip_ratio/low_mean": 0.0006805818802604335, "clip_ratio/low_min": 2.8755463063134812e-05, "clip_ratio/region_mean": 0.0013810280506731942, "epoch": 0.09305310070058158, "grad_norm": 0.1578313410282135, "learning_rate": 2e-07, "loss": 0.0205, "step": 997 }, { "clip_ratio/high_max": 0.001954502578882966, "clip_ratio/high_mean": 0.0006651413123108796, "clip_ratio/low_mean": 0.0007015267970018613, "clip_ratio/low_min": 8.887940566637553e-06, "clip_ratio/region_mean": 0.0013666680933965836, "epoch": 0.09314643380058217, "grad_norm": 0.1596735715866089, "learning_rate": 2e-07, "loss": 0.0523, "step": 998 }, { "clip_ratio/high_max": 0.0016801849633338861, "clip_ratio/high_mean": 0.0007377110505331075, "clip_ratio/low_mean": 0.000856043277963181, "clip_ratio/low_min": 0.00011845880544569809, "clip_ratio/region_mean": 0.0015937543375912355, "epoch": 0.09323976690058275, "grad_norm": 0.15979592502117157, "learning_rate": 2e-07, "loss": 0.0379, "step": 999 }, { "clip_ratio/high_max": 0.0020866238337475806, "clip_ratio/high_mean": 0.0007494851415685844, "clip_ratio/low_mean": 0.0007071653726598015, "clip_ratio/low_min": 6.123017919890117e-05, "clip_ratio/region_mean": 0.00145665050149546, "epoch": 0.09333310000058333, "grad_norm": 0.16106240451335907, "learning_rate": 2e-07, "loss": 0.0436, "step": 1000 }, { "clip_ratio/high_max": 0.0016557607741560787, "clip_ratio/high_mean": 0.0006794122955398052, "clip_ratio/low_mean": 0.0008124851428874535, "clip_ratio/low_min": 0.00010920055774477078, "clip_ratio/region_mean": 0.0014918974229658488, "epoch": 0.09342643310058392, "grad_norm": 0.18092547357082367, "learning_rate": 2e-07, "loss": 0.0612, "step": 1001 }, { "clip_ratio/high_max": 0.0020951323549525114, "clip_ratio/high_mean": 0.0007371590199909406, "clip_ratio/low_mean": 0.0007138944347389042, "clip_ratio/low_min": 5.461011733132182e-05, "clip_ratio/region_mean": 0.0014510534674627706, "epoch": 0.0935197662005845, "grad_norm": 0.16525115072727203, "learning_rate": 2e-07, "loss": 0.0251, "step": 1002 }, { "clip_ratio/high_max": 0.0019485964076011442, "clip_ratio/high_mean": 0.0007863827977416804, "clip_ratio/low_mean": 0.0007287699718290241, "clip_ratio/low_min": 3.737904626177624e-05, "clip_ratio/region_mean": 0.0015151527732086834, "epoch": 0.09361309930058508, "grad_norm": 0.16803322732448578, "learning_rate": 2e-07, "loss": -0.0095, "step": 1003 }, { "clip_ratio/high_max": 0.0019915703451260924, "clip_ratio/high_mean": 0.0008003491057024803, "clip_ratio/low_mean": 0.0007947855947350035, "clip_ratio/low_min": 7.038913645374123e-05, "clip_ratio/region_mean": 0.0015951346904330421, "epoch": 0.09370643240058567, "grad_norm": 3.764392852783203, "learning_rate": 2e-07, "loss": 0.0487, "step": 1004 }, { "clip_ratio/high_max": 0.0020382414404593874, "clip_ratio/high_mean": 0.0007909505857242038, "clip_ratio/low_mean": 0.0007304107766685775, "clip_ratio/low_min": 2.398311517026741e-05, "clip_ratio/region_mean": 0.0015213614024105482, "epoch": 0.09379976550058625, "grad_norm": 0.16409805417060852, "learning_rate": 2e-07, "loss": 0.0088, "step": 1005 }, { "clip_ratio/high_max": 0.0018765366257866845, "clip_ratio/high_mean": 0.000726656991901109, "clip_ratio/low_mean": 0.0006164400983834639, "clip_ratio/low_min": 1.1461580470495392e-05, "clip_ratio/region_mean": 0.0013430970757326577, "epoch": 0.09389309860058684, "grad_norm": 0.15302081406116486, "learning_rate": 2e-07, "loss": 0.0313, "step": 1006 }, { "clip_ratio/high_max": 0.0017205978510901332, "clip_ratio/high_mean": 0.0007287298803930753, "clip_ratio/low_mean": 0.0007563193721580319, "clip_ratio/low_min": 4.80368830722e-05, "clip_ratio/region_mean": 0.0014850492698315065, "epoch": 0.09398643170058742, "grad_norm": 0.17403312027454376, "learning_rate": 2e-07, "loss": 0.0297, "step": 1007 }, { "clip_ratio/high_max": 0.0020732058510475326, "clip_ratio/high_mean": 0.0008300708550450508, "clip_ratio/low_mean": 0.0007659287148271687, "clip_ratio/low_min": 1.0414931239211e-05, "clip_ratio/region_mean": 0.001595999587152619, "epoch": 0.094079764800588, "grad_norm": 0.16306549310684204, "learning_rate": 2e-07, "loss": 0.0102, "step": 1008 }, { "clip_ratio/high_max": 0.0018988859956152737, "clip_ratio/high_mean": 0.0007440997032972518, "clip_ratio/low_mean": 0.0007344037239818135, "clip_ratio/low_min": 5.233309275354259e-05, "clip_ratio/region_mean": 0.0014785034290980548, "epoch": 0.09417309790058859, "grad_norm": 0.14677399396896362, "learning_rate": 2e-07, "loss": 0.0521, "step": 1009 }, { "clip_ratio/high_max": 0.0020322283598943613, "clip_ratio/high_mean": 0.0008096237033896614, "clip_ratio/low_mean": 0.0008060116597334854, "clip_ratio/low_min": 4.864781476499047e-05, "clip_ratio/region_mean": 0.0016156353667611256, "epoch": 0.09426643100058917, "grad_norm": 0.14751096069812775, "learning_rate": 2e-07, "loss": 0.0084, "step": 1010 }, { "clip_ratio/high_max": 0.0020679688423115294, "clip_ratio/high_mean": 0.0007567625016235979, "clip_ratio/low_mean": 0.0007551411108579487, "clip_ratio/low_min": 5.103127841721289e-05, "clip_ratio/region_mean": 0.0015119036106625572, "epoch": 0.09435976410058974, "grad_norm": 0.16788513958454132, "learning_rate": 2e-07, "loss": 0.078, "step": 1011 }, { "clip_ratio/high_max": 0.001995256494410569, "clip_ratio/high_mean": 0.0007811438445060048, "clip_ratio/low_mean": 0.0006943780554138357, "clip_ratio/low_min": 4.351900042820489e-05, "clip_ratio/region_mean": 0.0014755218944628723, "epoch": 0.09445309720059034, "grad_norm": 0.15616944432258606, "learning_rate": 2e-07, "loss": 0.012, "step": 1012 }, { "clip_ratio/high_max": 0.0021542187605518848, "clip_ratio/high_mean": 0.0008820974653644953, "clip_ratio/low_mean": 0.0007092805553838843, "clip_ratio/low_min": 1.4964683032303583e-05, "clip_ratio/region_mean": 0.0015913780589471571, "epoch": 0.09454643030059091, "grad_norm": 0.1882270723581314, "learning_rate": 2e-07, "loss": -0.0067, "step": 1013 }, { "clip_ratio/high_max": 0.0021625988119922113, "clip_ratio/high_mean": 0.0008375363977393135, "clip_ratio/low_mean": 0.0008140048921632115, "clip_ratio/low_min": 4.327881197241368e-05, "clip_ratio/region_mean": 0.0016515413153683767, "epoch": 0.09463976340059149, "grad_norm": 0.19054551422595978, "learning_rate": 2e-07, "loss": 0.0203, "step": 1014 }, { "clip_ratio/high_max": 0.0020210101502016187, "clip_ratio/high_mean": 0.0008207140981539851, "clip_ratio/low_mean": 0.0007094963348208694, "clip_ratio/low_min": 9.103827869694214e-05, "clip_ratio/region_mean": 0.0015302104657166637, "epoch": 0.09473309650059208, "grad_norm": 0.16385230422019958, "learning_rate": 2e-07, "loss": -0.0076, "step": 1015 }, { "clip_ratio/high_max": 0.0017702915502013639, "clip_ratio/high_mean": 0.000703186757164076, "clip_ratio/low_mean": 0.0007607234929309925, "clip_ratio/low_min": 4.91570235681138e-05, "clip_ratio/region_mean": 0.001463910230086185, "epoch": 0.09482642960059266, "grad_norm": 0.1446237415075302, "learning_rate": 2e-07, "loss": 0.0382, "step": 1016 }, { "clip_ratio/high_max": 0.0020887565697194077, "clip_ratio/high_mean": 0.0007949076889417483, "clip_ratio/low_mean": 0.0008456218329229159, "clip_ratio/low_min": 9.0389453816897e-05, "clip_ratio/region_mean": 0.0016405294918513391, "epoch": 0.09491976270059325, "grad_norm": 0.19028033316135406, "learning_rate": 2e-07, "loss": 0.0384, "step": 1017 }, { "clip_ratio/high_max": 0.0022450946125900373, "clip_ratio/high_mean": 0.000900813869520789, "clip_ratio/low_mean": 0.0007954278171382612, "clip_ratio/low_min": 3.768257647607243e-05, "clip_ratio/region_mean": 0.0016962417139438912, "epoch": 0.09501309580059383, "grad_norm": 0.16988860070705414, "learning_rate": 2e-07, "loss": -0.0293, "step": 1018 }, { "clip_ratio/high_max": 0.002386932697845623, "clip_ratio/high_mean": 0.0009201758530252846, "clip_ratio/low_mean": 0.0007499026087316452, "clip_ratio/low_min": 1.173488544736756e-05, "clip_ratio/region_mean": 0.0016700784326530993, "epoch": 0.09510642890059441, "grad_norm": 0.16819550096988678, "learning_rate": 2e-07, "loss": 0.0309, "step": 1019 }, { "clip_ratio/high_max": 0.0018833024332707282, "clip_ratio/high_mean": 0.0007875569090174395, "clip_ratio/low_mean": 0.0006861471774755046, "clip_ratio/low_min": 5.8686335250968114e-05, "clip_ratio/region_mean": 0.0014737041092303116, "epoch": 0.095199762000595, "grad_norm": 0.17758208513259888, "learning_rate": 2e-07, "loss": -0.0057, "step": 1020 }, { "clip_ratio/high_max": 0.002236204112705309, "clip_ratio/high_mean": 0.0009477347593929153, "clip_ratio/low_mean": 0.0007580286746815545, "clip_ratio/low_min": 3.737293081940152e-05, "clip_ratio/region_mean": 0.0017057634468073957, "epoch": 0.09529309510059558, "grad_norm": 0.19571329653263092, "learning_rate": 2e-07, "loss": -0.005, "step": 1021 }, { "clip_ratio/high_max": 0.0019461091578705236, "clip_ratio/high_mean": 0.0007506560923502548, "clip_ratio/low_mean": 0.0007778410472383257, "clip_ratio/low_min": 3.863439451379236e-05, "clip_ratio/region_mean": 0.0015284971450455487, "epoch": 0.09538642820059616, "grad_norm": 0.1953987181186676, "learning_rate": 2e-07, "loss": 0.0273, "step": 1022 }, { "clip_ratio/high_max": 0.0021502482049982063, "clip_ratio/high_mean": 0.0008394458454858977, "clip_ratio/low_mean": 0.0006919594370629056, "clip_ratio/low_min": 2.0791749193449505e-05, "clip_ratio/region_mean": 0.0015314052361645736, "epoch": 0.09547976130059675, "grad_norm": 0.16121304035186768, "learning_rate": 2e-07, "loss": 0.0185, "step": 1023 }, { "clip_ratio/high_max": 0.002215524022176396, "clip_ratio/high_mean": 0.0008673295287735527, "clip_ratio/low_mean": 0.0008442235412076116, "clip_ratio/low_min": 3.6828641896136105e-05, "clip_ratio/region_mean": 0.0017115530718001537, "epoch": 0.09557309440059733, "grad_norm": 0.18938623368740082, "learning_rate": 2e-07, "loss": 0.0186, "step": 1024 }, { "clip_ratio/high_max": 0.0014378484320332063, "clip_ratio/high_mean": 0.0005517959430108021, "clip_ratio/low_mean": 0.0005131368152433424, "clip_ratio/low_min": 2.5095361706917174e-05, "clip_ratio/region_mean": 0.0010649327632563654, "completions/clipped_ratio": 0.016357421875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 688.0979614257812, "completions/mean_terminated_length": 631.4264526367188, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.09566642750059791, "grad_norm": 0.14708003401756287, "learning_rate": 2e-07, "loss": 0.0151, "num_tokens": 766251397.0, "reward": 0.5881609320640564, "reward_std": 0.18022732436656952, "rewards/simpleverify_reward/mean": 0.5881609320640564, "rewards/simpleverify_reward/std": 0.49216827750205994, "step": 1025 }, { "clip_ratio/high_max": 0.0016916839849727694, "clip_ratio/high_mean": 0.0006159574913908727, "clip_ratio/low_mean": 0.0005496037592820358, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011655612179310992, "epoch": 0.0957597606005985, "grad_norm": 0.13866794109344482, "learning_rate": 2e-07, "loss": 0.0299, "step": 1026 }, { "clip_ratio/high_max": 0.001847016901592724, "clip_ratio/high_mean": 0.0006025774873705814, "clip_ratio/low_mean": 0.0005515253305929946, "clip_ratio/low_min": 2.3036706807033625e-05, "clip_ratio/region_mean": 0.0011541028252395336, "epoch": 0.09585309370059908, "grad_norm": 0.16881100833415985, "learning_rate": 2e-07, "loss": 0.026, "step": 1027 }, { "clip_ratio/high_max": 0.0020663442264776677, "clip_ratio/high_mean": 0.0007692217004660051, "clip_ratio/low_mean": 0.000668367174512241, "clip_ratio/low_min": 9.465446510148467e-05, "clip_ratio/region_mean": 0.001437588878616225, "epoch": 0.09594642680059967, "grad_norm": 0.17618006467819214, "learning_rate": 2e-07, "loss": 0.0337, "step": 1028 }, { "clip_ratio/high_max": 0.0014562933647539467, "clip_ratio/high_mean": 0.0006174530481075635, "clip_ratio/low_mean": 0.000542192741704639, "clip_ratio/low_min": 3.596656915760832e-05, "clip_ratio/region_mean": 0.0011596457989071496, "epoch": 0.09603975990060025, "grad_norm": 0.1466275453567505, "learning_rate": 2e-07, "loss": 0.0063, "step": 1029 }, { "clip_ratio/high_max": 0.0014505838953482453, "clip_ratio/high_mean": 0.0005131844172865385, "clip_ratio/low_mean": 0.000622073781414656, "clip_ratio/low_min": 0.00011859360529342666, "clip_ratio/region_mean": 0.0011352581896062475, "epoch": 0.09613309300060083, "grad_norm": 0.16999980807304382, "learning_rate": 2e-07, "loss": 0.0387, "step": 1030 }, { "clip_ratio/high_max": 0.0016901072849577758, "clip_ratio/high_mean": 0.0005895062386116479, "clip_ratio/low_mean": 0.0005215560777287465, "clip_ratio/low_min": 3.083405044890242e-05, "clip_ratio/region_mean": 0.0011110623308923095, "epoch": 0.09622642610060142, "grad_norm": 0.14095626771450043, "learning_rate": 2e-07, "loss": 0.0006, "step": 1031 }, { "clip_ratio/high_max": 0.0016323715353792068, "clip_ratio/high_mean": 0.0005746517363149906, "clip_ratio/low_mean": 0.0006463962690759217, "clip_ratio/low_min": 3.839696910290513e-05, "clip_ratio/region_mean": 0.0012210479653731454, "epoch": 0.096319759200602, "grad_norm": 0.15852895379066467, "learning_rate": 2e-07, "loss": 0.0734, "step": 1032 }, { "clip_ratio/high_max": 0.0015259204265021253, "clip_ratio/high_mean": 0.0005827600234624697, "clip_ratio/low_mean": 0.0006413541332221939, "clip_ratio/low_min": 5.0354283303022385e-05, "clip_ratio/region_mean": 0.0012241141666891053, "epoch": 0.09641309230060258, "grad_norm": 0.15097637474536896, "learning_rate": 2e-07, "loss": 0.0675, "step": 1033 }, { "clip_ratio/high_max": 0.0020330803672550246, "clip_ratio/high_mean": 0.0007633451423316728, "clip_ratio/low_mean": 0.000565596298656601, "clip_ratio/low_min": 5.0420063416822813e-05, "clip_ratio/region_mean": 0.0013289414491737261, "epoch": 0.09650642540060317, "grad_norm": 0.1422506868839264, "learning_rate": 2e-07, "loss": 0.0398, "step": 1034 }, { "clip_ratio/high_max": 0.0015215708226605784, "clip_ratio/high_mean": 0.0006347874132188736, "clip_ratio/low_mean": 0.0005298105015754118, "clip_ratio/low_min": 2.170218613173347e-05, "clip_ratio/region_mean": 0.0011645979102468118, "epoch": 0.09659975850060375, "grad_norm": 0.18003129959106445, "learning_rate": 2e-07, "loss": 0.0208, "step": 1035 }, { "clip_ratio/high_max": 0.0017026555506163277, "clip_ratio/high_mean": 0.0006326746042759623, "clip_ratio/low_mean": 0.0005475996194945765, "clip_ratio/low_min": 4.200718103675172e-05, "clip_ratio/region_mean": 0.0011802742155850865, "epoch": 0.09669309160060433, "grad_norm": 0.15974678099155426, "learning_rate": 2e-07, "loss": 0.0267, "step": 1036 }, { "clip_ratio/high_max": 0.0016561401025683153, "clip_ratio/high_mean": 0.00062518098820874, "clip_ratio/low_mean": 0.0005852482408954529, "clip_ratio/low_min": 1.565239108458627e-05, "clip_ratio/region_mean": 0.0012104292254662141, "epoch": 0.09678642470060492, "grad_norm": 0.14771315455436707, "learning_rate": 2e-07, "loss": 0.0343, "step": 1037 }, { "clip_ratio/high_max": 0.0015022968254925217, "clip_ratio/high_mean": 0.0005291304896672955, "clip_ratio/low_mean": 0.0005519391706911847, "clip_ratio/low_min": 3.993748669017805e-05, "clip_ratio/region_mean": 0.0010810696403495967, "epoch": 0.0968797578006055, "grad_norm": 0.1412869542837143, "learning_rate": 2e-07, "loss": 0.0392, "step": 1038 }, { "clip_ratio/high_max": 0.0014179872414388228, "clip_ratio/high_mean": 0.0005276464544294868, "clip_ratio/low_mean": 0.0006172272169351345, "clip_ratio/low_min": 1.4540123629558366e-05, "clip_ratio/region_mean": 0.001144873684097547, "epoch": 0.09697309090060609, "grad_norm": 0.1414092481136322, "learning_rate": 2e-07, "loss": 0.018, "step": 1039 }, { "clip_ratio/high_max": 0.0020093211278435774, "clip_ratio/high_mean": 0.0007387639780063182, "clip_ratio/low_mean": 0.0006546789954882115, "clip_ratio/low_min": 3.0385104764718562e-05, "clip_ratio/region_mean": 0.0013934429716755403, "epoch": 0.09706642400060667, "grad_norm": 3.05383038520813, "learning_rate": 2e-07, "loss": 0.0136, "step": 1040 }, { "clip_ratio/high_max": 0.0016524317688890733, "clip_ratio/high_mean": 0.0006180343461892335, "clip_ratio/low_mean": 0.0005607486473309109, "clip_ratio/low_min": 3.5719126572075766e-05, "clip_ratio/region_mean": 0.0011787830226239748, "epoch": 0.09715975710060724, "grad_norm": 0.1433306336402893, "learning_rate": 2e-07, "loss": 0.0271, "step": 1041 }, { "clip_ratio/high_max": 0.001474804103054339, "clip_ratio/high_mean": 0.0005959991613053717, "clip_ratio/low_mean": 0.0005221336359682027, "clip_ratio/low_min": 5.6562392273917794e-05, "clip_ratio/region_mean": 0.0011181327827216592, "epoch": 0.09725309020060784, "grad_norm": 0.15571217238903046, "learning_rate": 2e-07, "loss": 0.0555, "step": 1042 }, { "clip_ratio/high_max": 0.0019341195475135464, "clip_ratio/high_mean": 0.0007099760241544573, "clip_ratio/low_mean": 0.0006023020487191388, "clip_ratio/low_min": 3.740999909496168e-05, "clip_ratio/region_mean": 0.0013122780510457233, "epoch": 0.09734642330060841, "grad_norm": 0.15605802834033966, "learning_rate": 2e-07, "loss": 0.0397, "step": 1043 }, { "clip_ratio/high_max": 0.0014168736597639509, "clip_ratio/high_mean": 0.0006161271030578064, "clip_ratio/low_mean": 0.0005508987687790068, "clip_ratio/low_min": 2.1357391233323142e-05, "clip_ratio/region_mean": 0.0011670258463709615, "epoch": 0.09743975640060899, "grad_norm": 0.1509365737438202, "learning_rate": 2e-07, "loss": 0.0231, "step": 1044 }, { "clip_ratio/high_max": 0.0014546292750310386, "clip_ratio/high_mean": 0.000653802472697862, "clip_ratio/low_mean": 0.000603642183705233, "clip_ratio/low_min": 2.830098856065888e-05, "clip_ratio/region_mean": 0.001257444659131579, "epoch": 0.09753308950060958, "grad_norm": 0.1463029384613037, "learning_rate": 2e-07, "loss": 0.0454, "step": 1045 }, { "clip_ratio/high_max": 0.0015412387001561, "clip_ratio/high_mean": 0.0006065310626581777, "clip_ratio/low_mean": 0.000608464297329192, "clip_ratio/low_min": 3.5031098377658054e-05, "clip_ratio/region_mean": 0.001214995347254444, "epoch": 0.09762642260061016, "grad_norm": 0.14445355534553528, "learning_rate": 2e-07, "loss": 0.0199, "step": 1046 }, { "clip_ratio/high_max": 0.00156498274373007, "clip_ratio/high_mean": 0.0006398326586349867, "clip_ratio/low_mean": 0.0006145226343505783, "clip_ratio/low_min": 2.3263812181539834e-05, "clip_ratio/region_mean": 0.0012543552911665756, "epoch": 0.09771975570061076, "grad_norm": 0.1488150656223297, "learning_rate": 2e-07, "loss": 0.0458, "step": 1047 }, { "clip_ratio/high_max": 0.001422135090251686, "clip_ratio/high_mean": 0.0005962453124084277, "clip_ratio/low_mean": 0.0006148298289190279, "clip_ratio/low_min": 3.244934441681835e-05, "clip_ratio/region_mean": 0.0012110751304135192, "epoch": 0.09781308880061133, "grad_norm": 0.13644738495349884, "learning_rate": 2e-07, "loss": -0.0012, "step": 1048 }, { "clip_ratio/high_max": 0.0013715420027438086, "clip_ratio/high_mean": 0.0005221882256591925, "clip_ratio/low_mean": 0.0005905131720282952, "clip_ratio/low_min": 3.7213456380413845e-05, "clip_ratio/region_mean": 0.001112701396777993, "epoch": 0.09790642190061191, "grad_norm": 0.15375658869743347, "learning_rate": 2e-07, "loss": 0.0334, "step": 1049 }, { "clip_ratio/high_max": 0.00188410504051717, "clip_ratio/high_mean": 0.0006943056087038713, "clip_ratio/low_mean": 0.0006372497073243721, "clip_ratio/low_min": 6.70334502501646e-05, "clip_ratio/region_mean": 0.0013315552969288547, "epoch": 0.0979997550006125, "grad_norm": 0.16149811446666718, "learning_rate": 2e-07, "loss": 0.0166, "step": 1050 }, { "clip_ratio/high_max": 0.0018606667290441692, "clip_ratio/high_mean": 0.0006596406346943695, "clip_ratio/low_mean": 0.0005451956212709774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012048362514178734, "epoch": 0.09809308810061308, "grad_norm": 0.1412322074174881, "learning_rate": 2e-07, "loss": 0.0422, "step": 1051 }, { "clip_ratio/high_max": 0.0019422017503529787, "clip_ratio/high_mean": 0.0007313859550777124, "clip_ratio/low_mean": 0.0006135300377536623, "clip_ratio/low_min": 6.0183759160281625e-05, "clip_ratio/region_mean": 0.0013449159487208817, "epoch": 0.09818642120061366, "grad_norm": 0.13570831716060638, "learning_rate": 2e-07, "loss": 0.0228, "step": 1052 }, { "clip_ratio/high_max": 0.0017667753782006912, "clip_ratio/high_mean": 0.0007219247872853884, "clip_ratio/low_mean": 0.000667163996695308, "clip_ratio/low_min": 4.954661289957585e-05, "clip_ratio/region_mean": 0.0013890887676097918, "epoch": 0.09827975430061425, "grad_norm": 0.1552492082118988, "learning_rate": 2e-07, "loss": 0.0316, "step": 1053 }, { "clip_ratio/high_max": 0.0016986169466690626, "clip_ratio/high_mean": 0.0007095016753737582, "clip_ratio/low_mean": 0.0005949293345111073, "clip_ratio/low_min": 4.4419772166293114e-05, "clip_ratio/region_mean": 0.0013044310398981906, "epoch": 0.09837308740061483, "grad_norm": 0.15365970134735107, "learning_rate": 2e-07, "loss": 0.0154, "step": 1054 }, { "clip_ratio/high_max": 0.0015401641285279766, "clip_ratio/high_mean": 0.0005732785207328561, "clip_ratio/low_mean": 0.0005696142725355458, "clip_ratio/low_min": 9.837872312346008e-06, "clip_ratio/region_mean": 0.0011428928119130433, "epoch": 0.09846642050061541, "grad_norm": 0.1418396532535553, "learning_rate": 2e-07, "loss": 0.0677, "step": 1055 }, { "clip_ratio/high_max": 0.0017675054623396136, "clip_ratio/high_mean": 0.0007238244215841405, "clip_ratio/low_mean": 0.000682338139085914, "clip_ratio/low_min": 1.3356760973692872e-05, "clip_ratio/region_mean": 0.001406162580678938, "epoch": 0.098559753600616, "grad_norm": 0.1694512516260147, "learning_rate": 2e-07, "loss": 0.0506, "step": 1056 }, { "clip_ratio/high_max": 0.001602050873771077, "clip_ratio/high_mean": 0.0006504069660877576, "clip_ratio/low_mean": 0.0007078447015373968, "clip_ratio/low_min": 3.176004975102842e-05, "clip_ratio/region_mean": 0.0013582516767201014, "epoch": 0.09865308670061658, "grad_norm": 0.1421012580394745, "learning_rate": 2e-07, "loss": 0.0602, "step": 1057 }, { "clip_ratio/high_max": 0.0018375961171841482, "clip_ratio/high_mean": 0.0006802187417633832, "clip_ratio/low_mean": 0.0006417808872356545, "clip_ratio/low_min": 6.562881571881007e-05, "clip_ratio/region_mean": 0.00132199962172308, "epoch": 0.09874641980061717, "grad_norm": 0.15694449841976166, "learning_rate": 2e-07, "loss": 0.0567, "step": 1058 }, { "clip_ratio/high_max": 0.0018998623345396481, "clip_ratio/high_mean": 0.0007419255198328756, "clip_ratio/low_mean": 0.0005810023740195902, "clip_ratio/low_min": 3.517110508255428e-05, "clip_ratio/region_mean": 0.0013229279247752856, "epoch": 0.09883975290061775, "grad_norm": 0.15664304792881012, "learning_rate": 2e-07, "loss": -0.0069, "step": 1059 }, { "clip_ratio/high_max": 0.0015013605880085379, "clip_ratio/high_mean": 0.0005992483238514978, "clip_ratio/low_mean": 0.0006321663640846964, "clip_ratio/low_min": 4.217208243062487e-05, "clip_ratio/region_mean": 0.001231414709764067, "epoch": 0.09893308600061833, "grad_norm": 0.1460362821817398, "learning_rate": 2e-07, "loss": 0.0253, "step": 1060 }, { "clip_ratio/high_max": 0.0017318265308858827, "clip_ratio/high_mean": 0.0006700886997350608, "clip_ratio/low_mean": 0.0005704220639017876, "clip_ratio/low_min": 1.4589169040846173e-05, "clip_ratio/region_mean": 0.0012405107445374597, "epoch": 0.09902641910061892, "grad_norm": 0.14607422053813934, "learning_rate": 2e-07, "loss": -0.0176, "step": 1061 }, { "clip_ratio/high_max": 0.0013816305981890764, "clip_ratio/high_mean": 0.0005875156748516019, "clip_ratio/low_mean": 0.0006704420693495194, "clip_ratio/low_min": 1.3275275705382228e-05, "clip_ratio/region_mean": 0.0012579577305587009, "epoch": 0.0991197522006195, "grad_norm": 0.15838906168937683, "learning_rate": 2e-07, "loss": 0.0446, "step": 1062 }, { "clip_ratio/high_max": 0.0018588349339552224, "clip_ratio/high_mean": 0.0006482922017312376, "clip_ratio/low_mean": 0.0005395031039370224, "clip_ratio/low_min": 3.416464733163593e-05, "clip_ratio/region_mean": 0.001187795296573313, "epoch": 0.09921308530062008, "grad_norm": 0.14904262125492096, "learning_rate": 2e-07, "loss": 0.0281, "step": 1063 }, { "clip_ratio/high_max": 0.0014360553868755233, "clip_ratio/high_mean": 0.0005475255038618343, "clip_ratio/low_mean": 0.0005829582687510992, "clip_ratio/low_min": 4.5051278902974445e-05, "clip_ratio/region_mean": 0.0011304837571515236, "epoch": 0.09930641840062067, "grad_norm": 0.14753474295139313, "learning_rate": 2e-07, "loss": 0.0431, "step": 1064 }, { "clip_ratio/high_max": 0.0017909514353959821, "clip_ratio/high_mean": 0.0007480057265638607, "clip_ratio/low_mean": 0.0005338946066331118, "clip_ratio/low_min": 7.827175977581646e-06, "clip_ratio/region_mean": 0.001281900335015962, "epoch": 0.09939975150062125, "grad_norm": 0.14933542907238007, "learning_rate": 2e-07, "loss": 0.0148, "step": 1065 }, { "clip_ratio/high_max": 0.0015598027457599528, "clip_ratio/high_mean": 0.0005854885503140395, "clip_ratio/low_mean": 0.0006075235323805828, "clip_ratio/low_min": 4.125160376133863e-05, "clip_ratio/region_mean": 0.0011930120635952335, "epoch": 0.09949308460062183, "grad_norm": 0.15162643790245056, "learning_rate": 2e-07, "loss": 0.0148, "step": 1066 }, { "clip_ratio/high_max": 0.0018379451939836144, "clip_ratio/high_mean": 0.0007593542541144416, "clip_ratio/low_mean": 0.00048340085140807787, "clip_ratio/low_min": 2.776235487544909e-05, "clip_ratio/region_mean": 0.001242755082785152, "epoch": 0.09958641770062242, "grad_norm": 0.15261171758174896, "learning_rate": 2e-07, "loss": -0.0019, "step": 1067 }, { "clip_ratio/high_max": 0.0018035454377240967, "clip_ratio/high_mean": 0.0007400907215924235, "clip_ratio/low_mean": 0.0006519323096654261, "clip_ratio/low_min": 6.042232962499838e-05, "clip_ratio/region_mean": 0.0013920230449002702, "epoch": 0.099679750800623, "grad_norm": 0.15740974247455597, "learning_rate": 2e-07, "loss": 0.0137, "step": 1068 }, { "clip_ratio/high_max": 0.0015695099282311276, "clip_ratio/high_mean": 0.0006296430733527814, "clip_ratio/low_mean": 0.0005989285436953651, "clip_ratio/low_min": 1.1606313819356728e-05, "clip_ratio/region_mean": 0.0012285716184123885, "epoch": 0.09977308390062359, "grad_norm": 0.14155597984790802, "learning_rate": 2e-07, "loss": 0.019, "step": 1069 }, { "clip_ratio/high_max": 0.0012968215960427187, "clip_ratio/high_mean": 0.0005502943440660601, "clip_ratio/low_mean": 0.0005429208395071328, "clip_ratio/low_min": 1.1134865417261608e-05, "clip_ratio/region_mean": 0.001093215174478246, "epoch": 0.09986641700062417, "grad_norm": 0.2143881767988205, "learning_rate": 2e-07, "loss": 0.1044, "step": 1070 }, { "clip_ratio/high_max": 0.0017457262438256294, "clip_ratio/high_mean": 0.0007524430911871605, "clip_ratio/low_mean": 0.0005688552232641086, "clip_ratio/low_min": 6.22768475295743e-05, "clip_ratio/region_mean": 0.001321298292168649, "epoch": 0.09995975010062474, "grad_norm": 0.15124933421611786, "learning_rate": 2e-07, "loss": -0.0014, "step": 1071 }, { "clip_ratio/high_max": 0.0016318767447955906, "clip_ratio/high_mean": 0.0006549804020323791, "clip_ratio/low_mean": 0.0006409282141248696, "clip_ratio/low_min": 2.285254595335573e-05, "clip_ratio/region_mean": 0.0012959085797774605, "epoch": 0.10005308320062534, "grad_norm": 0.16364982724189758, "learning_rate": 2e-07, "loss": 0.0451, "step": 1072 }, { "clip_ratio/high_max": 0.0016330828293575905, "clip_ratio/high_mean": 0.0006716552852594759, "clip_ratio/low_mean": 0.0005879046075278893, "clip_ratio/low_min": 1.1852835086756386e-05, "clip_ratio/region_mean": 0.0012595598818734288, "epoch": 0.10014641630062592, "grad_norm": 0.15425899624824524, "learning_rate": 2e-07, "loss": -0.0054, "step": 1073 }, { "clip_ratio/high_max": 0.0015259459360095207, "clip_ratio/high_mean": 0.0005894996120332507, "clip_ratio/low_mean": 0.0005918595070397714, "clip_ratio/low_min": 3.4677268558880314e-05, "clip_ratio/region_mean": 0.0011813591518148314, "epoch": 0.1002397494006265, "grad_norm": 0.17031630873680115, "learning_rate": 2e-07, "loss": 0.0419, "step": 1074 }, { "clip_ratio/high_max": 0.0016506117572134826, "clip_ratio/high_mean": 0.000685876505485794, "clip_ratio/low_mean": 0.0005948870493739378, "clip_ratio/low_min": 9.391915136802709e-05, "clip_ratio/region_mean": 0.0012807635539502371, "epoch": 0.10033308250062709, "grad_norm": 0.15664374828338623, "learning_rate": 2e-07, "loss": -0.0001, "step": 1075 }, { "clip_ratio/high_max": 0.0019362687889952213, "clip_ratio/high_mean": 0.0007669846654607682, "clip_ratio/low_mean": 0.0006705073956254637, "clip_ratio/low_min": 4.813044870388694e-05, "clip_ratio/region_mean": 0.0014374920720001683, "epoch": 0.10042641560062766, "grad_norm": 0.16379493474960327, "learning_rate": 2e-07, "loss": -0.0203, "step": 1076 }, { "clip_ratio/high_max": 0.0015932394926494453, "clip_ratio/high_mean": 0.0006472947925431072, "clip_ratio/low_mean": 0.0006747439892933471, "clip_ratio/low_min": 5.611099550151266e-05, "clip_ratio/region_mean": 0.0013220387845649384, "epoch": 0.10051974870062824, "grad_norm": 0.1649751365184784, "learning_rate": 2e-07, "loss": 0.0826, "step": 1077 }, { "clip_ratio/high_max": 0.0016202120714297052, "clip_ratio/high_mean": 0.0007010794843154144, "clip_ratio/low_mean": 0.000568507890420733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012695873774646316, "epoch": 0.10061308180062883, "grad_norm": 0.1702614277601242, "learning_rate": 2e-07, "loss": 0.0217, "step": 1078 }, { "clip_ratio/high_max": 0.001876426969829481, "clip_ratio/high_mean": 0.0006912707776791649, "clip_ratio/low_mean": 0.0008076408494162024, "clip_ratio/low_min": 8.027134299481986e-05, "clip_ratio/region_mean": 0.0014989116534707136, "epoch": 0.10070641490062941, "grad_norm": 0.21598941087722778, "learning_rate": 2e-07, "loss": 0.0417, "step": 1079 }, { "clip_ratio/high_max": 0.0017965964398172218, "clip_ratio/high_mean": 0.000657634476738167, "clip_ratio/low_mean": 0.0006064373192202765, "clip_ratio/low_min": 2.0789444533875212e-05, "clip_ratio/region_mean": 0.0012640717868634965, "epoch": 0.10079974800063, "grad_norm": 0.14918962121009827, "learning_rate": 2e-07, "loss": 0.015, "step": 1080 }, { "clip_ratio/high_max": 0.0017111210108851083, "clip_ratio/high_mean": 0.0006135367730166763, "clip_ratio/low_mean": 0.0006410542355297366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012545910285552964, "epoch": 0.10089308110063058, "grad_norm": 0.1647917479276657, "learning_rate": 2e-07, "loss": 0.0282, "step": 1081 }, { "clip_ratio/high_max": 0.0015425382734974846, "clip_ratio/high_mean": 0.0006584089769603452, "clip_ratio/low_mean": 0.0005921895553910872, "clip_ratio/low_min": 4.2094071432075e-05, "clip_ratio/region_mean": 0.0012505985541793052, "epoch": 0.10098641420063116, "grad_norm": 0.15945541858673096, "learning_rate": 2e-07, "loss": 0.0432, "step": 1082 }, { "clip_ratio/high_max": 0.001761048817570554, "clip_ratio/high_mean": 0.0007012805508566089, "clip_ratio/low_mean": 0.0007137244738260051, "clip_ratio/low_min": 2.256865263916552e-05, "clip_ratio/region_mean": 0.001415005033777561, "epoch": 0.10107974730063175, "grad_norm": 0.14704836905002594, "learning_rate": 2e-07, "loss": 0.0533, "step": 1083 }, { "clip_ratio/high_max": 0.0015764176932862028, "clip_ratio/high_mean": 0.0006854046205262421, "clip_ratio/low_mean": 0.0007194344634626759, "clip_ratio/low_min": 8.922711094783153e-05, "clip_ratio/region_mean": 0.0014048390912648756, "epoch": 0.10117308040063233, "grad_norm": 0.17536237835884094, "learning_rate": 2e-07, "loss": 0.0222, "step": 1084 }, { "clip_ratio/high_max": 0.0017117588940891437, "clip_ratio/high_mean": 0.000690675136866048, "clip_ratio/low_mean": 0.0005924012857576599, "clip_ratio/low_min": 3.3768368666642345e-05, "clip_ratio/region_mean": 0.0012830764353566337, "epoch": 0.10126641350063291, "grad_norm": 0.17821721732616425, "learning_rate": 2e-07, "loss": 0.0327, "step": 1085 }, { "clip_ratio/high_max": 0.0018025294339167885, "clip_ratio/high_mean": 0.0006769746760255657, "clip_ratio/low_mean": 0.0005648236710840138, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012417983489285689, "epoch": 0.1013597466006335, "grad_norm": 0.15017281472682953, "learning_rate": 2e-07, "loss": -0.0209, "step": 1086 }, { "clip_ratio/high_max": 0.0018446145331836306, "clip_ratio/high_mean": 0.000690034501531045, "clip_ratio/low_mean": 0.0007388792801066302, "clip_ratio/low_min": 6.953795582376188e-05, "clip_ratio/region_mean": 0.001428913776180707, "epoch": 0.10145307970063408, "grad_norm": 0.1970624029636383, "learning_rate": 2e-07, "loss": 0.0478, "step": 1087 }, { "clip_ratio/high_max": 0.0018227329856017604, "clip_ratio/high_mean": 0.000725278579920996, "clip_ratio/low_mean": 0.0006709175900141418, "clip_ratio/low_min": 8.060100117290858e-05, "clip_ratio/region_mean": 0.0013961961813038215, "epoch": 0.10154641280063466, "grad_norm": 0.1950201541185379, "learning_rate": 2e-07, "loss": 0.0387, "step": 1088 }, { "clip_ratio/high_max": 0.0018493605966796167, "clip_ratio/high_mean": 0.0007223429856821895, "clip_ratio/low_mean": 0.0006925458728801459, "clip_ratio/low_min": 1.3185654097469524e-05, "clip_ratio/region_mean": 0.0014148888767522294, "epoch": 0.10163974590063525, "grad_norm": 0.15911564230918884, "learning_rate": 2e-07, "loss": 0.0596, "step": 1089 }, { "clip_ratio/high_max": 0.0018189161346526816, "clip_ratio/high_mean": 0.0007318314656004077, "clip_ratio/low_mean": 0.0005715087754651904, "clip_ratio/low_min": 1.1160714166180696e-05, "clip_ratio/region_mean": 0.0013033402501605451, "epoch": 0.10173307900063583, "grad_norm": 0.1550421267747879, "learning_rate": 2e-07, "loss": 0.0214, "step": 1090 }, { "clip_ratio/high_max": 0.0020656078595493454, "clip_ratio/high_mean": 0.0007536990287917433, "clip_ratio/low_mean": 0.0006875927410874283, "clip_ratio/low_min": 4.433202730069752e-05, "clip_ratio/region_mean": 0.0014412917735171504, "epoch": 0.10182641210063642, "grad_norm": 0.18259114027023315, "learning_rate": 2e-07, "loss": 0.0568, "step": 1091 }, { "clip_ratio/high_max": 0.0016649528333800845, "clip_ratio/high_mean": 0.000697522410973761, "clip_ratio/low_mean": 0.000618533170381852, "clip_ratio/low_min": 3.604501762310974e-05, "clip_ratio/region_mean": 0.0013160555790818762, "epoch": 0.101919745200637, "grad_norm": 0.15836119651794434, "learning_rate": 2e-07, "loss": 0.027, "step": 1092 }, { "clip_ratio/high_max": 0.001766547851730138, "clip_ratio/high_mean": 0.0006421936432161601, "clip_ratio/low_mean": 0.0007076363917803974, "clip_ratio/low_min": 0.00010495582228031708, "clip_ratio/region_mean": 0.0013498299995262641, "epoch": 0.10201307830063758, "grad_norm": 0.14460568130016327, "learning_rate": 2e-07, "loss": 0.0669, "step": 1093 }, { "clip_ratio/high_max": 0.00136424141965108, "clip_ratio/high_mean": 0.0005403467648648075, "clip_ratio/low_mean": 0.000626366099822917, "clip_ratio/low_min": 1.607096965017263e-05, "clip_ratio/region_mean": 0.0011667128565022722, "epoch": 0.10210641140063817, "grad_norm": 0.19492022693157196, "learning_rate": 2e-07, "loss": 0.0428, "step": 1094 }, { "clip_ratio/high_max": 0.0018926275733974762, "clip_ratio/high_mean": 0.0007042887791612884, "clip_ratio/low_mean": 0.0008054274203459499, "clip_ratio/low_min": 7.806214580341475e-05, "clip_ratio/region_mean": 0.001509716206783196, "epoch": 0.10219974450063875, "grad_norm": 0.45484256744384766, "learning_rate": 2e-07, "loss": 0.0508, "step": 1095 }, { "clip_ratio/high_max": 0.0016730453353375196, "clip_ratio/high_mean": 0.0007238493126351386, "clip_ratio/low_mean": 0.0007824249933037208, "clip_ratio/low_min": 6.230577309906948e-05, "clip_ratio/region_mean": 0.0015062742895679548, "epoch": 0.10229307760063933, "grad_norm": 0.19504110515117645, "learning_rate": 2e-07, "loss": 0.0564, "step": 1096 }, { "clip_ratio/high_max": 0.0016367621938115917, "clip_ratio/high_mean": 0.0006565264311575447, "clip_ratio/low_mean": 0.0006940491894056322, "clip_ratio/low_min": 5.03113533341093e-05, "clip_ratio/region_mean": 0.001350575632386608, "epoch": 0.10238641070063992, "grad_norm": 0.1701657623052597, "learning_rate": 2e-07, "loss": 0.0313, "step": 1097 }, { "clip_ratio/high_max": 0.0014567341258953093, "clip_ratio/high_mean": 0.000606280497777334, "clip_ratio/low_mean": 0.0007119195470295381, "clip_ratio/low_min": 7.631135667907074e-05, "clip_ratio/region_mean": 0.0013182000438973773, "epoch": 0.1024797438006405, "grad_norm": 0.16686387360095978, "learning_rate": 2e-07, "loss": 0.0671, "step": 1098 }, { "clip_ratio/high_max": 0.0017143516070063924, "clip_ratio/high_mean": 0.0007066034768286045, "clip_ratio/low_mean": 0.0007160510558605893, "clip_ratio/low_min": 3.743508477782598e-05, "clip_ratio/region_mean": 0.001422654513589805, "epoch": 0.10257307690064109, "grad_norm": 0.1850610077381134, "learning_rate": 2e-07, "loss": 0.0465, "step": 1099 }, { "clip_ratio/high_max": 0.001641176117118448, "clip_ratio/high_mean": 0.0006625574769714149, "clip_ratio/low_mean": 0.0005664665659423918, "clip_ratio/low_min": 8.286926458822563e-06, "clip_ratio/region_mean": 0.0012290240374568384, "epoch": 0.10266641000064167, "grad_norm": 0.15729431807994843, "learning_rate": 2e-07, "loss": -0.0214, "step": 1100 }, { "clip_ratio/high_max": 0.002012389933952363, "clip_ratio/high_mean": 0.0007761736633256078, "clip_ratio/low_mean": 0.0008028139891393948, "clip_ratio/low_min": 2.559916174504906e-05, "clip_ratio/region_mean": 0.001578987699758727, "epoch": 0.10275974310064225, "grad_norm": 0.17352890968322754, "learning_rate": 2e-07, "loss": 0.0186, "step": 1101 }, { "clip_ratio/high_max": 0.0017241790119442157, "clip_ratio/high_mean": 0.000714992094799527, "clip_ratio/low_mean": 0.0008037690622586524, "clip_ratio/low_min": 3.547297637851443e-05, "clip_ratio/region_mean": 0.0015187611934379674, "epoch": 0.10285307620064284, "grad_norm": 0.17286644876003265, "learning_rate": 2e-07, "loss": 0.0481, "step": 1102 }, { "clip_ratio/high_max": 0.001707520892523462, "clip_ratio/high_mean": 0.000703600146152894, "clip_ratio/low_mean": 0.0006809779943068861, "clip_ratio/low_min": 8.261368566309102e-05, "clip_ratio/region_mean": 0.0013845781249983702, "epoch": 0.10294640930064342, "grad_norm": 0.17523880302906036, "learning_rate": 2e-07, "loss": 0.0115, "step": 1103 }, { "clip_ratio/high_max": 0.001992955556488596, "clip_ratio/high_mean": 0.0007888808995630825, "clip_ratio/low_mean": 0.0006802679417887703, "clip_ratio/low_min": 4.4974053707846906e-05, "clip_ratio/region_mean": 0.0014691487922391389, "epoch": 0.103039742400644, "grad_norm": 0.15377196669578552, "learning_rate": 2e-07, "loss": -0.0228, "step": 1104 }, { "clip_ratio/high_max": 0.0017611919465707615, "clip_ratio/high_mean": 0.0007069728162605315, "clip_ratio/low_mean": 0.0006792380218030303, "clip_ratio/low_min": 8.428774526691996e-05, "clip_ratio/region_mean": 0.0013862108462490141, "epoch": 0.10313307550064459, "grad_norm": 0.17648661136627197, "learning_rate": 2e-07, "loss": -0.0061, "step": 1105 }, { "clip_ratio/high_max": 0.002141889137419639, "clip_ratio/high_mean": 0.0007625833859492559, "clip_ratio/low_mean": 0.0007133835661079502, "clip_ratio/low_min": 1.6414969650213607e-05, "clip_ratio/region_mean": 0.0014759669284103438, "epoch": 0.10322640860064516, "grad_norm": 0.1829444020986557, "learning_rate": 2e-07, "loss": 0.0316, "step": 1106 }, { "clip_ratio/high_max": 0.0018212226295872824, "clip_ratio/high_mean": 0.0006597093133677845, "clip_ratio/low_mean": 0.0006008648970237118, "clip_ratio/low_min": 3.7670837627956644e-05, "clip_ratio/region_mean": 0.0012605742122104857, "epoch": 0.10331974170064574, "grad_norm": 0.15556700527668, "learning_rate": 2e-07, "loss": -0.0026, "step": 1107 }, { "clip_ratio/high_max": 0.0016642345799482428, "clip_ratio/high_mean": 0.0006623064437007997, "clip_ratio/low_mean": 0.0006644109416811261, "clip_ratio/low_min": 1.240325491380645e-05, "clip_ratio/region_mean": 0.0013267173744679894, "epoch": 0.10341307480064633, "grad_norm": 0.17202286422252655, "learning_rate": 2e-07, "loss": 0.0375, "step": 1108 }, { "clip_ratio/high_max": 0.002002046487177722, "clip_ratio/high_mean": 0.0008412695588049246, "clip_ratio/low_mean": 0.0006451919844039367, "clip_ratio/low_min": 2.2739675841876306e-05, "clip_ratio/region_mean": 0.0014864615150145255, "epoch": 0.10350640790064691, "grad_norm": 0.18759506940841675, "learning_rate": 2e-07, "loss": -0.0077, "step": 1109 }, { "clip_ratio/high_max": 0.0018822737656591926, "clip_ratio/high_mean": 0.0006427048492696485, "clip_ratio/low_mean": 0.0008205131143768085, "clip_ratio/low_min": 8.546868457415258e-05, "clip_ratio/region_mean": 0.001463217951823026, "epoch": 0.1035997410006475, "grad_norm": 0.17778731882572174, "learning_rate": 2e-07, "loss": 0.0364, "step": 1110 }, { "clip_ratio/high_max": 0.0017383560407324694, "clip_ratio/high_mean": 0.0006919644165463978, "clip_ratio/low_mean": 0.0007098606256477069, "clip_ratio/low_min": 2.107077671098523e-05, "clip_ratio/region_mean": 0.0014018250367371365, "epoch": 0.10369307410064808, "grad_norm": 0.2000223845243454, "learning_rate": 2e-07, "loss": 0.0144, "step": 1111 }, { "clip_ratio/high_max": 0.0019446625155978836, "clip_ratio/high_mean": 0.0007200771960924612, "clip_ratio/low_mean": 0.0007276775231730426, "clip_ratio/low_min": 2.4101731469272636e-05, "clip_ratio/region_mean": 0.001447754715627525, "epoch": 0.10378640720064866, "grad_norm": 0.18549001216888428, "learning_rate": 2e-07, "loss": 0.0233, "step": 1112 }, { "clip_ratio/high_max": 0.001537330710561946, "clip_ratio/high_mean": 0.0006239347985683708, "clip_ratio/low_mean": 0.0007345005942624994, "clip_ratio/low_min": 4.676222533817054e-05, "clip_ratio/region_mean": 0.0013584354455815628, "epoch": 0.10387974030064925, "grad_norm": 0.23638804256916046, "learning_rate": 2e-07, "loss": 0.061, "step": 1113 }, { "clip_ratio/high_max": 0.0017658633842074778, "clip_ratio/high_mean": 0.0006900691842020024, "clip_ratio/low_mean": 0.0008230751318478724, "clip_ratio/low_min": 1.2465097825042903e-05, "clip_ratio/region_mean": 0.0015131443142308854, "epoch": 0.10397307340064983, "grad_norm": 0.20945629477500916, "learning_rate": 2e-07, "loss": 0.0856, "step": 1114 }, { "clip_ratio/high_max": 0.0017384473740094109, "clip_ratio/high_mean": 0.0007189507987277466, "clip_ratio/low_mean": 0.0007073538436088711, "clip_ratio/low_min": 6.58914814266609e-05, "clip_ratio/region_mean": 0.0014263046177802607, "epoch": 0.10406640650065041, "grad_norm": 0.19514112174510956, "learning_rate": 2e-07, "loss": -0.0075, "step": 1115 }, { "clip_ratio/high_max": 0.00187939171155449, "clip_ratio/high_mean": 0.0006622743076150073, "clip_ratio/low_mean": 0.0007319939122680807, "clip_ratio/low_min": 2.0377427972562145e-05, "clip_ratio/region_mean": 0.0013942682235210668, "epoch": 0.104159739600651, "grad_norm": 0.18565185368061066, "learning_rate": 2e-07, "loss": 0.0356, "step": 1116 }, { "clip_ratio/high_max": 0.0020871375454589725, "clip_ratio/high_mean": 0.0008166047882696148, "clip_ratio/low_mean": 0.0006817918165324954, "clip_ratio/low_min": 1.577486182213761e-05, "clip_ratio/region_mean": 0.0014983966320869513, "epoch": 0.10425307270065158, "grad_norm": 0.2912517488002777, "learning_rate": 2e-07, "loss": 0.0403, "step": 1117 }, { "clip_ratio/high_max": 0.0017056409305951092, "clip_ratio/high_mean": 0.0007120789287000662, "clip_ratio/low_mean": 0.0008236580524680903, "clip_ratio/low_min": 2.836718522303272e-05, "clip_ratio/region_mean": 0.001535737028461881, "epoch": 0.10434640580065216, "grad_norm": 0.19938957691192627, "learning_rate": 2e-07, "loss": 0.029, "step": 1118 }, { "clip_ratio/high_max": 0.001769970214809291, "clip_ratio/high_mean": 0.000710178956069285, "clip_ratio/low_mean": 0.0007748451625957387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014850241495878436, "epoch": 0.10443973890065275, "grad_norm": 0.34391912817955017, "learning_rate": 2e-07, "loss": 0.0379, "step": 1119 }, { "clip_ratio/high_max": 0.0017779520894691814, "clip_ratio/high_mean": 0.0007098604437487666, "clip_ratio/low_mean": 0.0008456395662506111, "clip_ratio/low_min": 2.8922179808432702e-05, "clip_ratio/region_mean": 0.0015554999699816108, "epoch": 0.10453307200065333, "grad_norm": 0.17450092732906342, "learning_rate": 2e-07, "loss": 0.0215, "step": 1120 }, { "clip_ratio/high_max": 0.0022155993938213214, "clip_ratio/high_mean": 0.0008303496852022363, "clip_ratio/low_mean": 0.0006930597101018066, "clip_ratio/low_min": 6.0944692449993454e-05, "clip_ratio/region_mean": 0.0015234094244078733, "epoch": 0.10462640510065392, "grad_norm": 0.22590035200119019, "learning_rate": 2e-07, "loss": -0.022, "step": 1121 }, { "clip_ratio/high_max": 0.002001709464821033, "clip_ratio/high_mean": 0.0007593416630697902, "clip_ratio/low_mean": 0.000666139983877656, "clip_ratio/low_min": 1.8341892427997664e-05, "clip_ratio/region_mean": 0.0014254816487664357, "epoch": 0.1047197382006545, "grad_norm": 0.16287989914417267, "learning_rate": 2e-07, "loss": 0.0096, "step": 1122 }, { "clip_ratio/high_max": 0.0015243976158672012, "clip_ratio/high_mean": 0.0006965569973544916, "clip_ratio/low_mean": 0.0007482387263735291, "clip_ratio/low_min": 4.736040500574745e-05, "clip_ratio/region_mean": 0.0014447957291849889, "epoch": 0.10481307130065508, "grad_norm": 0.18563826382160187, "learning_rate": 2e-07, "loss": 0.0294, "step": 1123 }, { "clip_ratio/high_max": 0.0020910659368382767, "clip_ratio/high_mean": 0.000823540220153518, "clip_ratio/low_mean": 0.0007974171621754067, "clip_ratio/low_min": 7.90133026384865e-05, "clip_ratio/region_mean": 0.0016209574023378082, "epoch": 0.10490640440065567, "grad_norm": 0.23470395803451538, "learning_rate": 2e-07, "loss": 0.0054, "step": 1124 }, { "clip_ratio/high_max": 0.001761332263413351, "clip_ratio/high_mean": 0.0007156250412663212, "clip_ratio/low_mean": 0.0009009028854052303, "clip_ratio/low_min": 2.2285612431005575e-05, "clip_ratio/region_mean": 0.0016165279594133608, "epoch": 0.10499973750065625, "grad_norm": 0.2094983160495758, "learning_rate": 2e-07, "loss": 0.0297, "step": 1125 }, { "clip_ratio/high_max": 0.0015303577783924993, "clip_ratio/high_mean": 0.0006309596046776278, "clip_ratio/low_mean": 0.0008965284941950813, "clip_ratio/low_min": 3.1694267818238586e-05, "clip_ratio/region_mean": 0.001527488089777762, "epoch": 0.10509307060065683, "grad_norm": 0.1982315480709076, "learning_rate": 2e-07, "loss": 0.0372, "step": 1126 }, { "clip_ratio/high_max": 0.0019632382827694528, "clip_ratio/high_mean": 0.0007322807578020729, "clip_ratio/low_mean": 0.0006698686320305569, "clip_ratio/low_min": 1.1208751857338939e-05, "clip_ratio/region_mean": 0.001402149413479492, "epoch": 0.10518640370065742, "grad_norm": 0.172402024269104, "learning_rate": 2e-07, "loss": 0.0207, "step": 1127 }, { "clip_ratio/high_max": 0.0017424712277716026, "clip_ratio/high_mean": 0.0007824306776456069, "clip_ratio/low_mean": 0.0007622028570040129, "clip_ratio/low_min": 4.890175569016719e-05, "clip_ratio/region_mean": 0.0015446335055457894, "epoch": 0.105279736800658, "grad_norm": 0.21946778893470764, "learning_rate": 2e-07, "loss": 0.0057, "step": 1128 }, { "clip_ratio/high_max": 0.0023641102452529594, "clip_ratio/high_mean": 0.0008045875656534918, "clip_ratio/low_mean": 0.0007482615528715542, "clip_ratio/low_min": 8.049485768424347e-05, "clip_ratio/region_mean": 0.0015528491567238234, "epoch": 0.10537306990065858, "grad_norm": 0.19112466275691986, "learning_rate": 2e-07, "loss": 0.0511, "step": 1129 }, { "clip_ratio/high_max": 0.0019674875256896485, "clip_ratio/high_mean": 0.0008085221561486833, "clip_ratio/low_mean": 0.0009789776304387487, "clip_ratio/low_min": 0.00016185067761398386, "clip_ratio/region_mean": 0.001787499786587432, "epoch": 0.10546640300065917, "grad_norm": 0.23398177325725555, "learning_rate": 2e-07, "loss": 0.0401, "step": 1130 }, { "clip_ratio/high_max": 0.0020986026611353736, "clip_ratio/high_mean": 0.0008616377217549598, "clip_ratio/low_mean": 0.0007748301904939581, "clip_ratio/low_min": 5.428488657344133e-05, "clip_ratio/region_mean": 0.0016364678958780132, "epoch": 0.10555973610065975, "grad_norm": 0.33156293630599976, "learning_rate": 2e-07, "loss": -0.0161, "step": 1131 }, { "clip_ratio/high_max": 0.0019223284252802841, "clip_ratio/high_mean": 0.0007875180253904546, "clip_ratio/low_mean": 0.0007947207832330605, "clip_ratio/low_min": 0.00012073359448550036, "clip_ratio/region_mean": 0.001582238735863939, "epoch": 0.10565306920066034, "grad_norm": 0.20530462265014648, "learning_rate": 2e-07, "loss": 0.0257, "step": 1132 }, { "clip_ratio/high_max": 0.0020733819001179654, "clip_ratio/high_mean": 0.0007886611747380812, "clip_ratio/low_mean": 0.0008109310183499474, "clip_ratio/low_min": 9.737064829096198e-05, "clip_ratio/region_mean": 0.0015995922294678167, "epoch": 0.10574640230066092, "grad_norm": 0.18688048422336578, "learning_rate": 2e-07, "loss": 0.0081, "step": 1133 }, { "clip_ratio/high_max": 0.002172916974814143, "clip_ratio/high_mean": 0.0008290860896522645, "clip_ratio/low_mean": 0.0009553911440889351, "clip_ratio/low_min": 3.9817372453399e-05, "clip_ratio/region_mean": 0.0017844772009993903, "epoch": 0.1058397354006615, "grad_norm": 0.22315232455730438, "learning_rate": 2e-07, "loss": 0.0062, "step": 1134 }, { "clip_ratio/high_max": 0.001972141813894268, "clip_ratio/high_mean": 0.0008083875654847361, "clip_ratio/low_mean": 0.0009335084541817196, "clip_ratio/low_min": 6.0593289163080044e-05, "clip_ratio/region_mean": 0.001741896012390498, "epoch": 0.10593306850066209, "grad_norm": 0.20710480213165283, "learning_rate": 2e-07, "loss": 0.0344, "step": 1135 }, { "clip_ratio/high_max": 0.0021570565149886534, "clip_ratio/high_mean": 0.0008218284074246185, "clip_ratio/low_mean": 0.0008251939470937941, "clip_ratio/low_min": 3.1045416108099744e-05, "clip_ratio/region_mean": 0.0016470223854412325, "epoch": 0.10602640160066266, "grad_norm": 0.1891990602016449, "learning_rate": 2e-07, "loss": 0.0141, "step": 1136 }, { "clip_ratio/high_max": 0.0017614163334656041, "clip_ratio/high_mean": 0.0007281454963958822, "clip_ratio/low_mean": 0.0008820163839118322, "clip_ratio/low_min": 3.826721331279259e-05, "clip_ratio/region_mean": 0.0016101618894026615, "epoch": 0.10611973470066324, "grad_norm": 0.18992166221141815, "learning_rate": 2e-07, "loss": 0.017, "step": 1137 }, { "clip_ratio/high_max": 0.0022039963005227037, "clip_ratio/high_mean": 0.0008524529403075576, "clip_ratio/low_mean": 0.0009391076455358416, "clip_ratio/low_min": 7.969427315401845e-05, "clip_ratio/region_mean": 0.001791560571291484, "epoch": 0.10621306780066384, "grad_norm": 0.20301789045333862, "learning_rate": 2e-07, "loss": 0.0247, "step": 1138 }, { "clip_ratio/high_max": 0.001920036505907774, "clip_ratio/high_mean": 0.0007118490293578361, "clip_ratio/low_mean": 0.0009270587270293618, "clip_ratio/low_min": 4.3461019231472164e-05, "clip_ratio/region_mean": 0.001638907786400523, "epoch": 0.10630640090066441, "grad_norm": 0.2717629373073578, "learning_rate": 2e-07, "loss": 0.0717, "step": 1139 }, { "clip_ratio/high_max": 0.0019034584765904583, "clip_ratio/high_mean": 0.0008074801407929044, "clip_ratio/low_mean": 0.0009448026539757848, "clip_ratio/low_min": 5.116897591506131e-05, "clip_ratio/region_mean": 0.0017522827838547528, "epoch": 0.10639973400066499, "grad_norm": 0.2513815462589264, "learning_rate": 2e-07, "loss": 0.0234, "step": 1140 }, { "clip_ratio/high_max": 0.001943527862749761, "clip_ratio/high_mean": 0.0007663091050744697, "clip_ratio/low_mean": 0.000956200664404605, "clip_ratio/low_min": 3.385154104762478e-05, "clip_ratio/region_mean": 0.0017225097726623062, "epoch": 0.10649306710066558, "grad_norm": 0.20724967122077942, "learning_rate": 2e-07, "loss": 0.0531, "step": 1141 }, { "clip_ratio/high_max": 0.0020496167890087236, "clip_ratio/high_mean": 0.0008580640715081245, "clip_ratio/low_mean": 0.0009527589772915235, "clip_ratio/low_min": 6.408581884898013e-05, "clip_ratio/region_mean": 0.0018108230287907645, "epoch": 0.10658640020066616, "grad_norm": 0.2833152413368225, "learning_rate": 2e-07, "loss": 0.0301, "step": 1142 }, { "clip_ratio/high_max": 0.002170580206438899, "clip_ratio/high_mean": 0.0008358599425264401, "clip_ratio/low_mean": 0.0010058011503133457, "clip_ratio/low_min": 0.0001087575797100726, "clip_ratio/region_mean": 0.0018416610328131355, "epoch": 0.10667973330066675, "grad_norm": 0.19138747453689575, "learning_rate": 2e-07, "loss": 0.0499, "step": 1143 }, { "clip_ratio/high_max": 0.002069901420327369, "clip_ratio/high_mean": 0.0007908684237918351, "clip_ratio/low_mean": 0.0007923690918687498, "clip_ratio/low_min": 3.8937671888561454e-05, "clip_ratio/region_mean": 0.0015832375138415955, "epoch": 0.10677306640066733, "grad_norm": 0.18338420987129211, "learning_rate": 2e-07, "loss": 0.0305, "step": 1144 }, { "clip_ratio/high_max": 0.002282039749843534, "clip_ratio/high_mean": 0.0008119083031488117, "clip_ratio/low_mean": 0.0008889147084119031, "clip_ratio/low_min": 5.3181777730060276e-05, "clip_ratio/region_mean": 0.0017008229697239585, "epoch": 0.10686639950066791, "grad_norm": 0.21848715841770172, "learning_rate": 2e-07, "loss": 0.0322, "step": 1145 }, { "clip_ratio/high_max": 0.0019890168769052252, "clip_ratio/high_mean": 0.0008314633214467904, "clip_ratio/low_mean": 0.0010063690697279526, "clip_ratio/low_min": 0.00013259013758215588, "clip_ratio/region_mean": 0.0018378323293291032, "epoch": 0.1069597326006685, "grad_norm": 0.2766454815864563, "learning_rate": 2e-07, "loss": 0.0752, "step": 1146 }, { "clip_ratio/high_max": 0.0021620424813590944, "clip_ratio/high_mean": 0.0009013813578349072, "clip_ratio/low_mean": 0.0009818292510317406, "clip_ratio/low_min": 7.670309787499718e-05, "clip_ratio/region_mean": 0.0018832106215995736, "epoch": 0.10705306570066908, "grad_norm": 0.2228546440601349, "learning_rate": 2e-07, "loss": 0.0089, "step": 1147 }, { "clip_ratio/high_max": 0.002279377687955275, "clip_ratio/high_mean": 0.0008424243824265432, "clip_ratio/low_mean": 0.0008772078363108449, "clip_ratio/low_min": 8.314251317642629e-05, "clip_ratio/region_mean": 0.0017196322296513245, "epoch": 0.10714639880066966, "grad_norm": 0.2301560491323471, "learning_rate": 2e-07, "loss": 0.0306, "step": 1148 }, { "clip_ratio/high_max": 0.002117458076099865, "clip_ratio/high_mean": 0.0009613349138817284, "clip_ratio/low_mean": 0.001009680585411843, "clip_ratio/low_min": 9.428792418475496e-05, "clip_ratio/region_mean": 0.001971015488379635, "epoch": 0.10723973190067025, "grad_norm": 0.35168761014938354, "learning_rate": 2e-07, "loss": 0.0301, "step": 1149 }, { "clip_ratio/high_max": 0.0018279521609656513, "clip_ratio/high_mean": 0.0007901179360487731, "clip_ratio/low_mean": 0.0010681857711460907, "clip_ratio/low_min": 0.00014211980123945978, "clip_ratio/region_mean": 0.0018583037162898108, "epoch": 0.10733306500067083, "grad_norm": 0.2210843563079834, "learning_rate": 2e-07, "loss": 0.0344, "step": 1150 }, { "clip_ratio/high_max": 0.002192511448811274, "clip_ratio/high_mean": 0.0009225484973285347, "clip_ratio/low_mean": 0.0009286007571063237, "clip_ratio/low_min": 5.789871829620097e-05, "clip_ratio/region_mean": 0.0018511492744437419, "epoch": 0.10742639810067142, "grad_norm": 0.37375393509864807, "learning_rate": 2e-07, "loss": 0.0197, "step": 1151 }, { "clip_ratio/high_max": 0.002026788191869855, "clip_ratio/high_mean": 0.0007743613568891305, "clip_ratio/low_mean": 0.0010105150704475818, "clip_ratio/low_min": 9.71479212239501e-05, "clip_ratio/region_mean": 0.0017848763818619773, "epoch": 0.107519731200672, "grad_norm": 0.23327729105949402, "learning_rate": 2e-07, "loss": 0.0185, "step": 1152 }, { "clip_ratio/high_max": 0.0019002621484105475, "clip_ratio/high_mean": 0.0006500720155599993, "clip_ratio/low_mean": 0.0006044011634003255, "clip_ratio/low_min": 2.437122202536557e-05, "clip_ratio/region_mean": 0.0012544731580419466, "completions/clipped_ratio": 0.015712193080357095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 656.460205078125, "completions/mean_terminated_length": 601.5548706054688, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.10761306430067258, "grad_norm": 0.19238293170928955, "learning_rate": 2e-07, "loss": 0.0171, "num_tokens": 852865584.0, "reward": 0.5996355414390564, "reward_std": 0.17621906101703644, "rewards/simpleverify_reward/mean": 0.5996355414390564, "rewards/simpleverify_reward/std": 0.48997434973716736, "step": 1153 }, { "clip_ratio/high_max": 0.0018897384725278243, "clip_ratio/high_mean": 0.000760492597692064, "clip_ratio/low_mean": 0.0006038603478373261, "clip_ratio/low_min": 4.387695298646577e-05, "clip_ratio/region_mean": 0.0013643529418914113, "epoch": 0.10770639740067317, "grad_norm": 0.20492057502269745, "learning_rate": 2e-07, "loss": -0.0486, "step": 1154 }, { "clip_ratio/high_max": 0.0017187149096571375, "clip_ratio/high_mean": 0.0007300019533431623, "clip_ratio/low_mean": 0.0005704309360226034, "clip_ratio/low_min": 2.6425207579450216e-05, "clip_ratio/region_mean": 0.001300432879361324, "epoch": 0.10779973050067375, "grad_norm": 0.1889190822839737, "learning_rate": 2e-07, "loss": 0.0121, "step": 1155 }, { "clip_ratio/high_max": 0.0016163798409252195, "clip_ratio/high_mean": 0.0006204793471624725, "clip_ratio/low_mean": 0.0006403167190001113, "clip_ratio/low_min": 1.3572203897638246e-05, "clip_ratio/region_mean": 0.0012607961034518667, "epoch": 0.10789306360067433, "grad_norm": 0.24750067293643951, "learning_rate": 2e-07, "loss": 0.0597, "step": 1156 }, { "clip_ratio/high_max": 0.001811950085539138, "clip_ratio/high_mean": 0.00066639986471273, "clip_ratio/low_mean": 0.0006902259792695986, "clip_ratio/low_min": 2.602916629257379e-05, "clip_ratio/region_mean": 0.0013566258421633393, "epoch": 0.10798639670067492, "grad_norm": 0.18738169968128204, "learning_rate": 2e-07, "loss": 0.0484, "step": 1157 }, { "clip_ratio/high_max": 0.001550313172629103, "clip_ratio/high_mean": 0.0006352991385938367, "clip_ratio/low_mean": 0.0005363966229197104, "clip_ratio/low_min": 7.243434174597496e-05, "clip_ratio/region_mean": 0.0011716957633325364, "epoch": 0.1080797298006755, "grad_norm": 0.1862281709909439, "learning_rate": 2e-07, "loss": 0.0125, "step": 1158 }, { "clip_ratio/high_max": 0.001688683551037684, "clip_ratio/high_mean": 0.0006329928164632292, "clip_ratio/low_mean": 0.0006799435850552982, "clip_ratio/low_min": 1.8463810192770325e-05, "clip_ratio/region_mean": 0.0013129363942425698, "epoch": 0.10817306290067608, "grad_norm": 0.17455722391605377, "learning_rate": 2e-07, "loss": 0.0346, "step": 1159 }, { "clip_ratio/high_max": 0.0019503886287566274, "clip_ratio/high_mean": 0.0007601020006404724, "clip_ratio/low_mean": 0.0006450956025219057, "clip_ratio/low_min": 9.329564090876374e-05, "clip_ratio/region_mean": 0.0014051976322662085, "epoch": 0.10826639600067667, "grad_norm": 0.1794072836637497, "learning_rate": 2e-07, "loss": 0.0426, "step": 1160 }, { "clip_ratio/high_max": 0.0014136854551907163, "clip_ratio/high_mean": 0.000550233527974342, "clip_ratio/low_mean": 0.0006850391746411333, "clip_ratio/low_min": 2.255142680951394e-05, "clip_ratio/region_mean": 0.0012352727171673905, "epoch": 0.10835972910067725, "grad_norm": 0.20358531177043915, "learning_rate": 2e-07, "loss": 0.0768, "step": 1161 }, { "clip_ratio/high_max": 0.0015740208255010657, "clip_ratio/high_mean": 0.0006260943364395644, "clip_ratio/low_mean": 0.0006881153221911518, "clip_ratio/low_min": 6.587641291844193e-05, "clip_ratio/region_mean": 0.0013142096613592003, "epoch": 0.10845306220067784, "grad_norm": 0.19229905307292938, "learning_rate": 2e-07, "loss": 0.0671, "step": 1162 }, { "clip_ratio/high_max": 0.0016955044611677295, "clip_ratio/high_mean": 0.0006433852768168435, "clip_ratio/low_mean": 0.0005789099323010305, "clip_ratio/low_min": 4.275378159945831e-05, "clip_ratio/region_mean": 0.0012222952173033264, "epoch": 0.10854639530067842, "grad_norm": 0.20612914860248566, "learning_rate": 2e-07, "loss": 0.0108, "step": 1163 }, { "clip_ratio/high_max": 0.0017925937536347192, "clip_ratio/high_mean": 0.0007002661423030077, "clip_ratio/low_mean": 0.0006234129841686809, "clip_ratio/low_min": 5.0848529099312145e-05, "clip_ratio/region_mean": 0.001323679131019162, "epoch": 0.108639728400679, "grad_norm": 0.19809873402118683, "learning_rate": 2e-07, "loss": 0.008, "step": 1164 }, { "clip_ratio/high_max": 0.0014247491762944264, "clip_ratio/high_mean": 0.0006378434991347603, "clip_ratio/low_mean": 0.0005748375770053826, "clip_ratio/low_min": 1.252003221452469e-05, "clip_ratio/region_mean": 0.0012126810906920582, "epoch": 0.10873306150067959, "grad_norm": 0.17962507903575897, "learning_rate": 2e-07, "loss": 0.0193, "step": 1165 }, { "clip_ratio/high_max": 0.001734929290250875, "clip_ratio/high_mean": 0.0006868735126772663, "clip_ratio/low_mean": 0.0006244975884328596, "clip_ratio/low_min": 3.598973034968367e-05, "clip_ratio/region_mean": 0.001311371110205073, "epoch": 0.10882639460068017, "grad_norm": 0.16323807835578918, "learning_rate": 2e-07, "loss": -0.0118, "step": 1166 }, { "clip_ratio/high_max": 0.0016580265000811778, "clip_ratio/high_mean": 0.0005845399855388678, "clip_ratio/low_mean": 0.0006818801812187303, "clip_ratio/low_min": 1.3584003681899048e-05, "clip_ratio/region_mean": 0.00126642014583922, "epoch": 0.10891972770068074, "grad_norm": 0.18846657872200012, "learning_rate": 2e-07, "loss": 0.0193, "step": 1167 }, { "clip_ratio/high_max": 0.0018638455439941026, "clip_ratio/high_mean": 0.0006850944500911282, "clip_ratio/low_mean": 0.0007037348332232796, "clip_ratio/low_min": 6.323905518001993e-05, "clip_ratio/region_mean": 0.0013888292924093548, "epoch": 0.10901306080068134, "grad_norm": 0.16910623013973236, "learning_rate": 2e-07, "loss": 0.0255, "step": 1168 }, { "clip_ratio/high_max": 0.00180743131932104, "clip_ratio/high_mean": 0.0006637911028519738, "clip_ratio/low_mean": 0.0005263801376713673, "clip_ratio/low_min": 1.6765021428000182e-05, "clip_ratio/region_mean": 0.0011901712459803093, "epoch": 0.10910639390068191, "grad_norm": 0.33869290351867676, "learning_rate": 2e-07, "loss": 0.0528, "step": 1169 }, { "clip_ratio/high_max": 0.00174643629725324, "clip_ratio/high_mean": 0.000640493006358156, "clip_ratio/low_mean": 0.0007910332915344043, "clip_ratio/low_min": 7.310097953450168e-05, "clip_ratio/region_mean": 0.0014315263033495285, "epoch": 0.10919972700068249, "grad_norm": 0.1833248883485794, "learning_rate": 2e-07, "loss": 0.0275, "step": 1170 }, { "clip_ratio/high_max": 0.0015297609716071747, "clip_ratio/high_mean": 0.0005749658475906472, "clip_ratio/low_mean": 0.0006821286942795268, "clip_ratio/low_min": 3.819135508820182e-05, "clip_ratio/region_mean": 0.0012570945291372482, "epoch": 0.10929306010068308, "grad_norm": 0.18745556473731995, "learning_rate": 2e-07, "loss": 0.0596, "step": 1171 }, { "clip_ratio/high_max": 0.0019604101762524806, "clip_ratio/high_mean": 0.0008546398858015891, "clip_ratio/low_mean": 0.0006497148024209309, "clip_ratio/low_min": 3.786220077017788e-05, "clip_ratio/region_mean": 0.0015043546845845412, "epoch": 0.10938639320068366, "grad_norm": 0.17636573314666748, "learning_rate": 2e-07, "loss": -0.0058, "step": 1172 }, { "clip_ratio/high_max": 0.0018086004092765506, "clip_ratio/high_mean": 0.0006847701824881369, "clip_ratio/low_mean": 0.0008049473399296403, "clip_ratio/low_min": 6.556480184372049e-05, "clip_ratio/region_mean": 0.0014897175133228302, "epoch": 0.10947972630068425, "grad_norm": 0.19096620380878448, "learning_rate": 2e-07, "loss": 0.0473, "step": 1173 }, { "clip_ratio/high_max": 0.0017770928861864377, "clip_ratio/high_mean": 0.0006894767902849708, "clip_ratio/low_mean": 0.0006302645597315859, "clip_ratio/low_min": 2.0465381112444447e-05, "clip_ratio/region_mean": 0.0013197413754824083, "epoch": 0.10957305940068483, "grad_norm": 0.20893307030200958, "learning_rate": 2e-07, "loss": 0.0178, "step": 1174 }, { "clip_ratio/high_max": 0.0016911543934838846, "clip_ratio/high_mean": 0.0005927487700319034, "clip_ratio/low_mean": 0.0006785031237086514, "clip_ratio/low_min": 3.034847122762585e-05, "clip_ratio/region_mean": 0.0012712519310298376, "epoch": 0.10966639250068541, "grad_norm": 0.21008335053920746, "learning_rate": 2e-07, "loss": 0.0643, "step": 1175 }, { "clip_ratio/high_max": 0.001634020129131386, "clip_ratio/high_mean": 0.0006209048897289904, "clip_ratio/low_mean": 0.0006434713050111895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00126437619474018, "epoch": 0.109759725600686, "grad_norm": 0.20011335611343384, "learning_rate": 2e-07, "loss": 0.0437, "step": 1176 }, { "clip_ratio/high_max": 0.0017903684783959761, "clip_ratio/high_mean": 0.0006056546844774857, "clip_ratio/low_mean": 0.0006902671630086843, "clip_ratio/low_min": 8.607829840912018e-05, "clip_ratio/region_mean": 0.001295921829296276, "epoch": 0.10985305870068658, "grad_norm": 0.17649950087070465, "learning_rate": 2e-07, "loss": 0.0388, "step": 1177 }, { "clip_ratio/high_max": 0.0016608631194685586, "clip_ratio/high_mean": 0.0006518034433611319, "clip_ratio/low_mean": 0.0006805760567658581, "clip_ratio/low_min": 2.285192022100091e-05, "clip_ratio/region_mean": 0.0013323795028554741, "epoch": 0.10994639180068716, "grad_norm": 0.19707824289798737, "learning_rate": 2e-07, "loss": 0.0451, "step": 1178 }, { "clip_ratio/high_max": 0.001679947799857473, "clip_ratio/high_mean": 0.0006905553500473616, "clip_ratio/low_mean": 0.0005742215962527553, "clip_ratio/low_min": 2.733165729296161e-05, "clip_ratio/region_mean": 0.0012647769908653572, "epoch": 0.11003972490068775, "grad_norm": 0.1662975698709488, "learning_rate": 2e-07, "loss": 0.0112, "step": 1179 }, { "clip_ratio/high_max": 0.0021835309962625615, "clip_ratio/high_mean": 0.0007844407446100377, "clip_ratio/low_mean": 0.0006690054578939453, "clip_ratio/low_min": 4.135922063142061e-05, "clip_ratio/region_mean": 0.0014534462316078134, "epoch": 0.11013305800068833, "grad_norm": 0.17501528561115265, "learning_rate": 2e-07, "loss": 0.0322, "step": 1180 }, { "clip_ratio/high_max": 0.0012176905438536778, "clip_ratio/high_mean": 0.0005316880015016068, "clip_ratio/low_mean": 0.0006258552421058994, "clip_ratio/low_min": 1.2475049516069703e-05, "clip_ratio/region_mean": 0.0011575432436075062, "epoch": 0.11022639110068891, "grad_norm": 0.19051356613636017, "learning_rate": 2e-07, "loss": 0.029, "step": 1181 }, { "clip_ratio/high_max": 0.0016504912528034765, "clip_ratio/high_mean": 0.0007145509316615062, "clip_ratio/low_mean": 0.0006099364782130579, "clip_ratio/low_min": 3.292226665507769e-05, "clip_ratio/region_mean": 0.001324487428064458, "epoch": 0.1103197242006895, "grad_norm": 0.18952608108520508, "learning_rate": 2e-07, "loss": 0.0054, "step": 1182 }, { "clip_ratio/high_max": 0.0017306630943494383, "clip_ratio/high_mean": 0.0006773532932129456, "clip_ratio/low_mean": 0.0006516767771245213, "clip_ratio/low_min": 4.406255902722478e-05, "clip_ratio/region_mean": 0.0013290300848893821, "epoch": 0.11041305730069008, "grad_norm": 0.6571484804153442, "learning_rate": 2e-07, "loss": 0.0374, "step": 1183 }, { "clip_ratio/high_max": 0.00160444683933747, "clip_ratio/high_mean": 0.0006001115707476856, "clip_ratio/low_mean": 0.0006514978431368945, "clip_ratio/low_min": 3.748431390704354e-05, "clip_ratio/region_mean": 0.0012516094284364954, "epoch": 0.11050639040069067, "grad_norm": 0.1889439970254898, "learning_rate": 2e-07, "loss": 0.0266, "step": 1184 }, { "clip_ratio/high_max": 0.0015110461863514502, "clip_ratio/high_mean": 0.0006771017433493398, "clip_ratio/low_mean": 0.0007011533907643752, "clip_ratio/low_min": 2.269559627166018e-05, "clip_ratio/region_mean": 0.0013782551213807892, "epoch": 0.11059972350069125, "grad_norm": 0.3795429766178131, "learning_rate": 2e-07, "loss": 0.0591, "step": 1185 }, { "clip_ratio/high_max": 0.0016939643828663975, "clip_ratio/high_mean": 0.0007446527306456119, "clip_ratio/low_mean": 0.0006936214413144626, "clip_ratio/low_min": 6.161633882584283e-05, "clip_ratio/region_mean": 0.0014382741392182652, "epoch": 0.11069305660069183, "grad_norm": 0.17510807514190674, "learning_rate": 2e-07, "loss": 0.0186, "step": 1186 }, { "clip_ratio/high_max": 0.0016916420354391448, "clip_ratio/high_mean": 0.0006877633022668306, "clip_ratio/low_mean": 0.0006478916893684072, "clip_ratio/low_min": 4.485635872697458e-05, "clip_ratio/region_mean": 0.0013356550007301848, "epoch": 0.11078638970069242, "grad_norm": 0.19438563287258148, "learning_rate": 2e-07, "loss": 0.0085, "step": 1187 }, { "clip_ratio/high_max": 0.001742381566145923, "clip_ratio/high_mean": 0.0006968320249143289, "clip_ratio/low_mean": 0.0007700505957473069, "clip_ratio/low_min": 2.0587669951055432e-05, "clip_ratio/region_mean": 0.001466882629756583, "epoch": 0.110879722800693, "grad_norm": 0.18888793885707855, "learning_rate": 2e-07, "loss": 0.0299, "step": 1188 }, { "clip_ratio/high_max": 0.0018652331600605976, "clip_ratio/high_mean": 0.0007013054255367024, "clip_ratio/low_mean": 0.0007786355727148475, "clip_ratio/low_min": 9.107377900363645e-05, "clip_ratio/region_mean": 0.0014799410018895287, "epoch": 0.11097305590069358, "grad_norm": 0.1984153538942337, "learning_rate": 2e-07, "loss": 0.0069, "step": 1189 }, { "clip_ratio/high_max": 0.0017905516324390192, "clip_ratio/high_mean": 0.0007216577887447784, "clip_ratio/low_mean": 0.0007792749565851409, "clip_ratio/low_min": 7.929065304779215e-05, "clip_ratio/region_mean": 0.0015009327544248663, "epoch": 0.11106638900069417, "grad_norm": 0.21126650273799896, "learning_rate": 2e-07, "loss": 0.0422, "step": 1190 }, { "clip_ratio/high_max": 0.002048670910880901, "clip_ratio/high_mean": 0.0007061627829898498, "clip_ratio/low_mean": 0.0006752127701474819, "clip_ratio/low_min": 6.775066867703572e-05, "clip_ratio/region_mean": 0.00138137554313289, "epoch": 0.11115972210069475, "grad_norm": 0.19271402060985565, "learning_rate": 2e-07, "loss": 0.0361, "step": 1191 }, { "clip_ratio/high_max": 0.001685273316979874, "clip_ratio/high_mean": 0.0006654012759099714, "clip_ratio/low_mean": 0.0006931820407771738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013585832784883678, "epoch": 0.11125305520069532, "grad_norm": 0.198637455701828, "learning_rate": 2e-07, "loss": 0.0042, "step": 1192 }, { "clip_ratio/high_max": 0.001722113858704688, "clip_ratio/high_mean": 0.0006931632315172465, "clip_ratio/low_mean": 0.0006796923298679758, "clip_ratio/low_min": 6.91401955918991e-05, "clip_ratio/region_mean": 0.001372855535009876, "epoch": 0.11134638830069592, "grad_norm": 0.18219062685966492, "learning_rate": 2e-07, "loss": 0.0074, "step": 1193 }, { "clip_ratio/high_max": 0.002046237208560342, "clip_ratio/high_mean": 0.0007659142074771808, "clip_ratio/low_mean": 0.0007704471181568806, "clip_ratio/low_min": 3.65534197044326e-05, "clip_ratio/region_mean": 0.001536361323815072, "epoch": 0.1114397214006965, "grad_norm": 0.2089175432920456, "learning_rate": 2e-07, "loss": 0.0592, "step": 1194 }, { "clip_ratio/high_max": 0.0019044145519728772, "clip_ratio/high_mean": 0.0007285121937457006, "clip_ratio/low_mean": 0.0007503263477701694, "clip_ratio/low_min": 3.495569853839697e-05, "clip_ratio/region_mean": 0.0014788385669817217, "epoch": 0.11153305450069709, "grad_norm": 0.27354249358177185, "learning_rate": 2e-07, "loss": 0.0253, "step": 1195 }, { "clip_ratio/high_max": 0.0019455743131402414, "clip_ratio/high_mean": 0.0006315421251201769, "clip_ratio/low_mean": 0.0006557920091836422, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012873341402155347, "epoch": 0.11162638760069767, "grad_norm": 0.20190535485744476, "learning_rate": 2e-07, "loss": 0.0308, "step": 1196 }, { "clip_ratio/high_max": 0.0018181820450990926, "clip_ratio/high_mean": 0.000710436375811696, "clip_ratio/low_mean": 0.0006786940184611012, "clip_ratio/low_min": 3.301770811958704e-05, "clip_ratio/region_mean": 0.001389130367897451, "epoch": 0.11171972070069824, "grad_norm": 0.2243601679801941, "learning_rate": 2e-07, "loss": 0.0351, "step": 1197 }, { "clip_ratio/high_max": 0.0019020231993636116, "clip_ratio/high_mean": 0.0007015543069428531, "clip_ratio/low_mean": 0.0006950501265237108, "clip_ratio/low_min": 2.49451204581419e-05, "clip_ratio/region_mean": 0.0013966044461994898, "epoch": 0.11181305380069884, "grad_norm": 0.2290676385164261, "learning_rate": 2e-07, "loss": 0.0718, "step": 1198 }, { "clip_ratio/high_max": 0.001877194405096816, "clip_ratio/high_mean": 0.0006861620950076031, "clip_ratio/low_mean": 0.0007457264309778111, "clip_ratio/low_min": 3.0743220122531056e-05, "clip_ratio/region_mean": 0.001431888493243605, "epoch": 0.11190638690069941, "grad_norm": 0.20612452924251556, "learning_rate": 2e-07, "loss": 0.022, "step": 1199 }, { "clip_ratio/high_max": 0.0015688790663261898, "clip_ratio/high_mean": 0.0006790409079258097, "clip_ratio/low_mean": 0.0007916213580756448, "clip_ratio/low_min": 8.639704719826113e-05, "clip_ratio/region_mean": 0.001470662267820444, "epoch": 0.11199972000069999, "grad_norm": 0.19049671292304993, "learning_rate": 2e-07, "loss": 0.0463, "step": 1200 }, { "clip_ratio/high_max": 0.0017373840091750026, "clip_ratio/high_mean": 0.0006893138870509574, "clip_ratio/low_mean": 0.0007801554856996518, "clip_ratio/low_min": 3.9624186683795415e-05, "clip_ratio/region_mean": 0.0014694693818455562, "epoch": 0.11209305310070058, "grad_norm": 1.0247437953948975, "learning_rate": 2e-07, "loss": 0.027, "step": 1201 }, { "clip_ratio/high_max": 0.0019171255153196398, "clip_ratio/high_mean": 0.0007347384416789282, "clip_ratio/low_mean": 0.0006977326065680245, "clip_ratio/low_min": 3.211882903997321e-05, "clip_ratio/region_mean": 0.0014324710573418997, "epoch": 0.11218638620070116, "grad_norm": 0.19784703850746155, "learning_rate": 2e-07, "loss": 0.0282, "step": 1202 }, { "clip_ratio/high_max": 0.0016217431548284367, "clip_ratio/high_mean": 0.0007107505480234977, "clip_ratio/low_mean": 0.0007161825051298365, "clip_ratio/low_min": 7.916795402707066e-05, "clip_ratio/region_mean": 0.0014269330167735461, "epoch": 0.11227971930070174, "grad_norm": 0.22832456231117249, "learning_rate": 2e-07, "loss": -0.0029, "step": 1203 }, { "clip_ratio/high_max": 0.0015659690252505243, "clip_ratio/high_mean": 0.0006774532503186492, "clip_ratio/low_mean": 0.0008246831148426281, "clip_ratio/low_min": 9.923786910803756e-05, "clip_ratio/region_mean": 0.0015021363651612774, "epoch": 0.11237305240070233, "grad_norm": 0.21064640581607819, "learning_rate": 2e-07, "loss": 0.0281, "step": 1204 }, { "clip_ratio/high_max": 0.0018564150959718972, "clip_ratio/high_mean": 0.0006792473377572605, "clip_ratio/low_mean": 0.0007403427280223696, "clip_ratio/low_min": 2.4791748728603125e-05, "clip_ratio/region_mean": 0.0014195900803315453, "epoch": 0.11246638550070291, "grad_norm": 0.19517509639263153, "learning_rate": 2e-07, "loss": 0.013, "step": 1205 }, { "clip_ratio/high_max": 0.001983896698220633, "clip_ratio/high_mean": 0.000836336228530854, "clip_ratio/low_mean": 0.0008409950914938236, "clip_ratio/low_min": 1.52587890625e-05, "clip_ratio/region_mean": 0.0016773313254816458, "epoch": 0.1125597186007035, "grad_norm": 0.24281498789787292, "learning_rate": 2e-07, "loss": 0.0092, "step": 1206 }, { "clip_ratio/high_max": 0.002194206572312396, "clip_ratio/high_mean": 0.0008013892238523113, "clip_ratio/low_mean": 0.000783125511588878, "clip_ratio/low_min": 0.000118052366815391, "clip_ratio/region_mean": 0.0015845147536310833, "epoch": 0.11265305170070408, "grad_norm": 0.20426036417484283, "learning_rate": 2e-07, "loss": -0.0153, "step": 1207 }, { "clip_ratio/high_max": 0.0015902157720120158, "clip_ratio/high_mean": 0.0006815677243139362, "clip_ratio/low_mean": 0.0007573332186439075, "clip_ratio/low_min": 5.3408847634273116e-05, "clip_ratio/region_mean": 0.0014389009411388543, "epoch": 0.11274638480070466, "grad_norm": 0.20530135929584503, "learning_rate": 2e-07, "loss": 0.0515, "step": 1208 }, { "clip_ratio/high_max": 0.0018439588639012072, "clip_ratio/high_mean": 0.0007493673329008743, "clip_ratio/low_mean": 0.0007487677066819742, "clip_ratio/low_min": 1.4742304301762488e-05, "clip_ratio/region_mean": 0.0014981350432208274, "epoch": 0.11283971790070525, "grad_norm": 0.20239122211933136, "learning_rate": 2e-07, "loss": 0.0709, "step": 1209 }, { "clip_ratio/high_max": 0.0018591665066196583, "clip_ratio/high_mean": 0.0007780405139783397, "clip_ratio/low_mean": 0.0007599300406582188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015379705546365585, "epoch": 0.11293305100070583, "grad_norm": 0.19646097719669342, "learning_rate": 2e-07, "loss": 0.0027, "step": 1210 }, { "clip_ratio/high_max": 0.0020803330444323365, "clip_ratio/high_mean": 0.0007651474443264306, "clip_ratio/low_mean": 0.0006152255646156846, "clip_ratio/low_min": 1.068924211722333e-05, "clip_ratio/region_mean": 0.0013803730071231257, "epoch": 0.11302638410070641, "grad_norm": 0.28694963455200195, "learning_rate": 2e-07, "loss": -0.0157, "step": 1211 }, { "clip_ratio/high_max": 0.0018078330758726224, "clip_ratio/high_mean": 0.0007627837821928551, "clip_ratio/low_mean": 0.00077972027247597, "clip_ratio/low_min": 8.40582079035812e-05, "clip_ratio/region_mean": 0.0015425040401169099, "epoch": 0.113119717200707, "grad_norm": 0.20485040545463562, "learning_rate": 2e-07, "loss": 0.0343, "step": 1212 }, { "clip_ratio/high_max": 0.0019466567609924823, "clip_ratio/high_mean": 0.0007239247051984421, "clip_ratio/low_mean": 0.0007246837212733226, "clip_ratio/low_min": 8.900138618628262e-05, "clip_ratio/region_mean": 0.0014486084037343971, "epoch": 0.11321305030070758, "grad_norm": 0.18759402632713318, "learning_rate": 2e-07, "loss": 0.0192, "step": 1213 }, { "clip_ratio/high_max": 0.0018263100428157486, "clip_ratio/high_mean": 0.000684896100210608, "clip_ratio/low_mean": 0.0008184543858078541, "clip_ratio/low_min": 5.668627272825688e-05, "clip_ratio/region_mean": 0.001503350489656441, "epoch": 0.11330638340070817, "grad_norm": 0.20482584834098816, "learning_rate": 2e-07, "loss": 0.0584, "step": 1214 }, { "clip_ratio/high_max": 0.0020009840300190262, "clip_ratio/high_mean": 0.0006999194465606706, "clip_ratio/low_mean": 0.0008477875817334279, "clip_ratio/low_min": 3.225402360840235e-05, "clip_ratio/region_mean": 0.0015477069828193635, "epoch": 0.11339971650070875, "grad_norm": 0.22164009511470795, "learning_rate": 2e-07, "loss": 0.08, "step": 1215 }, { "clip_ratio/high_max": 0.0020330445986473933, "clip_ratio/high_mean": 0.0008297775748360436, "clip_ratio/low_mean": 0.0008044165297178552, "clip_ratio/low_min": 2.7448397304397076e-05, "clip_ratio/region_mean": 0.0016341940936399624, "epoch": 0.11349304960070933, "grad_norm": 0.3707368075847626, "learning_rate": 2e-07, "loss": 0.0038, "step": 1216 }, { "clip_ratio/high_max": 0.002099720783007797, "clip_ratio/high_mean": 0.0008849017358443234, "clip_ratio/low_mean": 0.0008955090124800336, "clip_ratio/low_min": 8.088512367976364e-05, "clip_ratio/region_mean": 0.001780410711944569, "epoch": 0.11358638270070992, "grad_norm": 0.3196471333503723, "learning_rate": 2e-07, "loss": -0.005, "step": 1217 }, { "clip_ratio/high_max": 0.0021337432153814007, "clip_ratio/high_mean": 0.0008269978443422588, "clip_ratio/low_mean": 0.0008508307073498145, "clip_ratio/low_min": 5.074280579719925e-05, "clip_ratio/region_mean": 0.0016778285571490414, "epoch": 0.1136797158007105, "grad_norm": 0.3326951861381531, "learning_rate": 2e-07, "loss": 0.047, "step": 1218 }, { "clip_ratio/high_max": 0.002000450127525255, "clip_ratio/high_mean": 0.0008451689491266734, "clip_ratio/low_mean": 0.0006960581831663148, "clip_ratio/low_min": 0.00010058009047497762, "clip_ratio/region_mean": 0.0015412271532113664, "epoch": 0.11377304890071108, "grad_norm": 0.22375719249248505, "learning_rate": 2e-07, "loss": 0.0192, "step": 1219 }, { "clip_ratio/high_max": 0.002024498870014213, "clip_ratio/high_mean": 0.0008330712462338852, "clip_ratio/low_mean": 0.0007046140945021762, "clip_ratio/low_min": 1.7675340131972916e-05, "clip_ratio/region_mean": 0.0015376853116322309, "epoch": 0.11386638200071167, "grad_norm": 0.714872419834137, "learning_rate": 2e-07, "loss": 0.0318, "step": 1220 }, { "clip_ratio/high_max": 0.0020713857156806625, "clip_ratio/high_mean": 0.0007897193499957211, "clip_ratio/low_mean": 0.0009708024226711132, "clip_ratio/low_min": 7.287433254532516e-05, "clip_ratio/region_mean": 0.0017605217944947071, "epoch": 0.11395971510071225, "grad_norm": 0.4111100435256958, "learning_rate": 2e-07, "loss": 0.0545, "step": 1221 }, { "clip_ratio/high_max": 0.0020507228982751258, "clip_ratio/high_mean": 0.0007924263281893218, "clip_ratio/low_mean": 0.000870325202413369, "clip_ratio/low_min": 3.0282498300948646e-05, "clip_ratio/region_mean": 0.0016627515215077437, "epoch": 0.11405304820071283, "grad_norm": 0.22753064334392548, "learning_rate": 2e-07, "loss": 0.0142, "step": 1222 }, { "clip_ratio/high_max": 0.0019056183882639743, "clip_ratio/high_mean": 0.0007450067651006975, "clip_ratio/low_mean": 0.0008190507542167325, "clip_ratio/low_min": 5.385330860008253e-05, "clip_ratio/region_mean": 0.001564057485666126, "epoch": 0.11414638130071342, "grad_norm": 0.2049776017665863, "learning_rate": 2e-07, "loss": 0.0383, "step": 1223 }, { "clip_ratio/high_max": 0.0018856094902730547, "clip_ratio/high_mean": 0.0008538159509043908, "clip_ratio/low_mean": 0.0008123189745674608, "clip_ratio/low_min": 1.1398868991818745e-05, "clip_ratio/region_mean": 0.001666134900006, "epoch": 0.114239714400714, "grad_norm": 0.254228800535202, "learning_rate": 2e-07, "loss": -0.0342, "step": 1224 }, { "clip_ratio/high_max": 0.0018409898511890788, "clip_ratio/high_mean": 0.0007669665919820545, "clip_ratio/low_mean": 0.0007762550776533317, "clip_ratio/low_min": 6.346553163893986e-05, "clip_ratio/region_mean": 0.0015432217041961849, "epoch": 0.11433304750071459, "grad_norm": 0.19395624101161957, "learning_rate": 2e-07, "loss": 0.0313, "step": 1225 }, { "clip_ratio/high_max": 0.0017795385792851448, "clip_ratio/high_mean": 0.0006638207960349973, "clip_ratio/low_mean": 0.0008822498293739045, "clip_ratio/low_min": 4.22526900365483e-05, "clip_ratio/region_mean": 0.0015460706454177853, "epoch": 0.11442638060071517, "grad_norm": 0.24296225607395172, "learning_rate": 2e-07, "loss": 0.0737, "step": 1226 }, { "clip_ratio/high_max": 0.0017573572331457399, "clip_ratio/high_mean": 0.0007908931402198505, "clip_ratio/low_mean": 0.0008601462723163422, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001651039405260235, "epoch": 0.11451971370071574, "grad_norm": 0.2264319509267807, "learning_rate": 2e-07, "loss": 0.0174, "step": 1227 }, { "clip_ratio/high_max": 0.0022153178106236737, "clip_ratio/high_mean": 0.0009089283248613356, "clip_ratio/low_mean": 0.0008238453992817085, "clip_ratio/low_min": 1.305619389313506e-05, "clip_ratio/region_mean": 0.00173277373687597, "epoch": 0.11461304680071634, "grad_norm": 0.2209435999393463, "learning_rate": 2e-07, "loss": -0.0048, "step": 1228 }, { "clip_ratio/high_max": 0.0018664769741008058, "clip_ratio/high_mean": 0.0007184306732597179, "clip_ratio/low_mean": 0.000890397383045638, "clip_ratio/low_min": 8.530589002475608e-05, "clip_ratio/region_mean": 0.0016088280426629353, "epoch": 0.11470637990071691, "grad_norm": 0.22522087395191193, "learning_rate": 2e-07, "loss": 0.037, "step": 1229 }, { "clip_ratio/high_max": 0.0019802098504442256, "clip_ratio/high_mean": 0.000781096266109671, "clip_ratio/low_mean": 0.0007984326966834487, "clip_ratio/low_min": 7.862776419642614e-05, "clip_ratio/region_mean": 0.001579528943693731, "epoch": 0.1147997130007175, "grad_norm": 0.27973753213882446, "learning_rate": 2e-07, "loss": 0.0491, "step": 1230 }, { "clip_ratio/high_max": 0.0019337311714480165, "clip_ratio/high_mean": 0.000725992358638905, "clip_ratio/low_mean": 0.0009105347617150983, "clip_ratio/low_min": 8.868769236869412e-05, "clip_ratio/region_mean": 0.001636527136724908, "epoch": 0.11489304610071809, "grad_norm": 0.24194501340389252, "learning_rate": 2e-07, "loss": 0.0454, "step": 1231 }, { "clip_ratio/high_max": 0.001900083228974836, "clip_ratio/high_mean": 0.0007702012626396026, "clip_ratio/low_mean": 0.0009474192647758173, "clip_ratio/low_min": 6.311534752967418e-05, "clip_ratio/region_mean": 0.001717620536510367, "epoch": 0.11498637920071866, "grad_norm": 0.2186608910560608, "learning_rate": 2e-07, "loss": 0.0288, "step": 1232 }, { "clip_ratio/high_max": 0.0017413284804206342, "clip_ratio/high_mean": 0.0006760418291378301, "clip_ratio/low_mean": 0.0009517173966742121, "clip_ratio/low_min": 6.296036281128181e-05, "clip_ratio/region_mean": 0.0016277592367259786, "epoch": 0.11507971230071924, "grad_norm": 0.25317421555519104, "learning_rate": 2e-07, "loss": 0.0781, "step": 1233 }, { "clip_ratio/high_max": 0.001897019988973625, "clip_ratio/high_mean": 0.0007237795489345444, "clip_ratio/low_mean": 0.0010252933843730716, "clip_ratio/low_min": 0.00012560267714434303, "clip_ratio/region_mean": 0.0017490729442215525, "epoch": 0.11517304540071983, "grad_norm": 0.5316627025604248, "learning_rate": 2e-07, "loss": 0.0489, "step": 1234 }, { "clip_ratio/high_max": 0.002178489052312216, "clip_ratio/high_mean": 0.0008479114385409048, "clip_ratio/low_mean": 0.0009778711100807413, "clip_ratio/low_min": 5.8120580433751456e-05, "clip_ratio/region_mean": 0.0018257825940963812, "epoch": 0.11526637850072041, "grad_norm": 0.2706198990345001, "learning_rate": 2e-07, "loss": 0.0153, "step": 1235 }, { "clip_ratio/high_max": 0.0019815278028545436, "clip_ratio/high_mean": 0.0007798407968948595, "clip_ratio/low_mean": 0.0009915838309098035, "clip_ratio/low_min": 6.787551410525339e-05, "clip_ratio/region_mean": 0.0017714246205287054, "epoch": 0.115359711600721, "grad_norm": 0.27135396003723145, "learning_rate": 2e-07, "loss": 0.0433, "step": 1236 }, { "clip_ratio/high_max": 0.0022353520107571967, "clip_ratio/high_mean": 0.0009104317996389, "clip_ratio/low_mean": 0.0009735964158608112, "clip_ratio/low_min": 5.403306204243563e-05, "clip_ratio/region_mean": 0.0018840282427845523, "epoch": 0.11545304470072158, "grad_norm": 0.22792552411556244, "learning_rate": 2e-07, "loss": -0.0031, "step": 1237 }, { "clip_ratio/high_max": 0.0023469542757084128, "clip_ratio/high_mean": 0.0009543703545205062, "clip_ratio/low_mean": 0.0010238815448246896, "clip_ratio/low_min": 9.087378202821128e-05, "clip_ratio/region_mean": 0.001978251864784397, "epoch": 0.11554637780072216, "grad_norm": 0.2636658847332001, "learning_rate": 2e-07, "loss": 0.0291, "step": 1238 }, { "clip_ratio/high_max": 0.0020772560201294255, "clip_ratio/high_mean": 0.0008355400896107312, "clip_ratio/low_mean": 0.0009484115798841231, "clip_ratio/low_min": 5.048568709753454e-05, "clip_ratio/region_mean": 0.0017839516513049603, "epoch": 0.11563971090072275, "grad_norm": 0.2819897532463074, "learning_rate": 2e-07, "loss": 0.0111, "step": 1239 }, { "clip_ratio/high_max": 0.0021809357804158935, "clip_ratio/high_mean": 0.0008405055846196774, "clip_ratio/low_mean": 0.0010645882994140266, "clip_ratio/low_min": 0.00013331532500160392, "clip_ratio/region_mean": 0.0019050938208238222, "epoch": 0.11573304400072333, "grad_norm": 0.264545202255249, "learning_rate": 2e-07, "loss": 0.0279, "step": 1240 }, { "clip_ratio/high_max": 0.0022496925739687867, "clip_ratio/high_mean": 0.0009306902684329543, "clip_ratio/low_mean": 0.0011292188319202978, "clip_ratio/low_min": 0.00010190761440753704, "clip_ratio/region_mean": 0.0020599090930772945, "epoch": 0.11582637710072391, "grad_norm": 0.24554972350597382, "learning_rate": 2e-07, "loss": -0.0014, "step": 1241 }, { "clip_ratio/high_max": 0.0022390368467313237, "clip_ratio/high_mean": 0.000828416676085908, "clip_ratio/low_mean": 0.0010387472311776946, "clip_ratio/low_min": 3.668619046948152e-05, "clip_ratio/region_mean": 0.0018671639300009701, "epoch": 0.1159197102007245, "grad_norm": 0.22647197544574738, "learning_rate": 2e-07, "loss": 0.0207, "step": 1242 }, { "clip_ratio/high_max": 0.002203932628617622, "clip_ratio/high_mean": 0.0009421211771041271, "clip_ratio/low_mean": 0.0011247519796597771, "clip_ratio/low_min": 6.0320092416077387e-05, "clip_ratio/region_mean": 0.0020668731958721764, "epoch": 0.11601304330072508, "grad_norm": 0.2896707057952881, "learning_rate": 2e-07, "loss": -0.005, "step": 1243 }, { "clip_ratio/high_max": 0.0025299021872342564, "clip_ratio/high_mean": 0.0009991048427764326, "clip_ratio/low_mean": 0.0011080071853939444, "clip_ratio/low_min": 0.00010431809414512827, "clip_ratio/region_mean": 0.0021071120354463346, "epoch": 0.11610637640072566, "grad_norm": 0.3097783327102661, "learning_rate": 2e-07, "loss": 0.0091, "step": 1244 }, { "clip_ratio/high_max": 0.002371006085013505, "clip_ratio/high_mean": 0.0009061949276656378, "clip_ratio/low_mean": 0.0011595743635552935, "clip_ratio/low_min": 0.0001039442695400794, "clip_ratio/region_mean": 0.002065769280306995, "epoch": 0.11619970950072625, "grad_norm": 0.3064813017845154, "learning_rate": 2e-07, "loss": 0.0251, "step": 1245 }, { "clip_ratio/high_max": 0.0021198121685301885, "clip_ratio/high_mean": 0.000794113875599578, "clip_ratio/low_mean": 0.0010648207353369799, "clip_ratio/low_min": 2.5150904548354447e-05, "clip_ratio/region_mean": 0.0018589346218504943, "epoch": 0.11629304260072683, "grad_norm": 0.2669926583766937, "learning_rate": 2e-07, "loss": 0.0063, "step": 1246 }, { "clip_ratio/high_max": 0.0022307105900836177, "clip_ratio/high_mean": 0.0009355627607874339, "clip_ratio/low_mean": 0.0011419211368774995, "clip_ratio/low_min": 0.00011037421700166306, "clip_ratio/region_mean": 0.0020774839067598805, "epoch": 0.11638637570072742, "grad_norm": 0.36085245013237, "learning_rate": 2e-07, "loss": 0.0548, "step": 1247 }, { "clip_ratio/high_max": 0.002027619550062809, "clip_ratio/high_mean": 0.00095057726775849, "clip_ratio/low_mean": 0.0011686959514918271, "clip_ratio/low_min": 6.969452624616679e-05, "clip_ratio/region_mean": 0.0021192732092458755, "epoch": 0.116479708800728, "grad_norm": 0.4071381390094757, "learning_rate": 2e-07, "loss": 0.0299, "step": 1248 }, { "clip_ratio/high_max": 0.002617474030557787, "clip_ratio/high_mean": 0.0009693719202914508, "clip_ratio/low_mean": 0.0011475475312181516, "clip_ratio/low_min": 6.0778689658036456e-05, "clip_ratio/region_mean": 0.0021169194660615176, "epoch": 0.11657304190072858, "grad_norm": 0.33404529094696045, "learning_rate": 2e-07, "loss": 0.0039, "step": 1249 }, { "clip_ratio/high_max": 0.002880485095374752, "clip_ratio/high_mean": 0.0011259794227953535, "clip_ratio/low_mean": 0.0012842701544286683, "clip_ratio/low_min": 0.0001142452092608437, "clip_ratio/region_mean": 0.002410249595413916, "epoch": 0.11666637500072917, "grad_norm": 0.44753509759902954, "learning_rate": 2e-07, "loss": 0.0088, "step": 1250 }, { "clip_ratio/high_max": 0.0024234193697338924, "clip_ratio/high_mean": 0.0009820406485232525, "clip_ratio/low_mean": 0.0011096184953203192, "clip_ratio/low_min": 3.582346107577905e-05, "clip_ratio/region_mean": 0.0020916591529385187, "epoch": 0.11675970810072975, "grad_norm": 0.344949871301651, "learning_rate": 2e-07, "loss": -0.0198, "step": 1251 }, { "clip_ratio/high_max": 0.00233278968698869, "clip_ratio/high_mean": 0.0010195172853855183, "clip_ratio/low_mean": 0.0014176431031955872, "clip_ratio/low_min": 2.1950244445179123e-05, "clip_ratio/region_mean": 0.0024371604013140313, "epoch": 0.11685304120073033, "grad_norm": 0.32029810547828674, "learning_rate": 2e-07, "loss": 0.0344, "step": 1252 }, { "clip_ratio/high_max": 0.0024745127229834907, "clip_ratio/high_mean": 0.0009009738223539898, "clip_ratio/low_mean": 0.001625277058337815, "clip_ratio/low_min": 0.0002985634873766685, "clip_ratio/region_mean": 0.002526250886148773, "epoch": 0.11694637430073092, "grad_norm": 0.46875813603401184, "learning_rate": 2e-07, "loss": 0.0756, "step": 1253 }, { "clip_ratio/high_max": 0.002519101166399196, "clip_ratio/high_mean": 0.0010257409157929942, "clip_ratio/low_mean": 0.0011413073443691246, "clip_ratio/low_min": 6.12545636613504e-05, "clip_ratio/region_mean": 0.0021670482674380764, "epoch": 0.1170397074007315, "grad_norm": 0.3189603090286255, "learning_rate": 2e-07, "loss": 0.0067, "step": 1254 }, { "clip_ratio/high_max": 0.002456131602230016, "clip_ratio/high_mean": 0.0009900657387333922, "clip_ratio/low_mean": 0.001537556938274065, "clip_ratio/low_min": 0.00010477024534338852, "clip_ratio/region_mean": 0.002527622658817563, "epoch": 0.11713304050073207, "grad_norm": 0.3813420534133911, "learning_rate": 2e-07, "loss": 0.0401, "step": 1255 }, { "clip_ratio/high_max": 0.0022081926217651926, "clip_ratio/high_mean": 0.0009666876030678395, "clip_ratio/low_mean": 0.0014399002284335438, "clip_ratio/low_min": 0.00020513307026703842, "clip_ratio/region_mean": 0.0024065878460532986, "epoch": 0.11722637360073267, "grad_norm": 0.504207193851471, "learning_rate": 2e-07, "loss": -0.0039, "step": 1256 }, { "clip_ratio/high_max": 0.0024596137373009697, "clip_ratio/high_mean": 0.000986370921964408, "clip_ratio/low_mean": 0.0015320670936489478, "clip_ratio/low_min": 6.149632781671244e-05, "clip_ratio/region_mean": 0.0025184379919664934, "epoch": 0.11731970670073325, "grad_norm": 0.46205493807792664, "learning_rate": 2e-07, "loss": 0.0752, "step": 1257 }, { "clip_ratio/high_max": 0.0025536917528370395, "clip_ratio/high_mean": 0.0011049027634726372, "clip_ratio/low_mean": 0.0016272512002615258, "clip_ratio/low_min": 0.00017271660908591002, "clip_ratio/region_mean": 0.002732154040131718, "epoch": 0.11741303980073384, "grad_norm": 0.3714674413204193, "learning_rate": 2e-07, "loss": 0.0302, "step": 1258 }, { "clip_ratio/high_max": 0.0025925065856426954, "clip_ratio/high_mean": 0.0010730062967923004, "clip_ratio/low_mean": 0.0016265659724012949, "clip_ratio/low_min": 1.552795038151089e-05, "clip_ratio/region_mean": 0.0026995722073479556, "epoch": 0.11750637290073442, "grad_norm": 0.7046552300453186, "learning_rate": 2e-07, "loss": -0.0023, "step": 1259 }, { "clip_ratio/high_max": 0.002735358186328085, "clip_ratio/high_mean": 0.001097457914511324, "clip_ratio/low_mean": 0.0015978077572071925, "clip_ratio/low_min": 9.329971271654358e-05, "clip_ratio/region_mean": 0.0026952657208312303, "epoch": 0.117599706000735, "grad_norm": 0.48868218064308167, "learning_rate": 2e-07, "loss": 0.032, "step": 1260 }, { "clip_ratio/high_max": 0.0026208938434137963, "clip_ratio/high_mean": 0.001101096404454438, "clip_ratio/low_mean": 0.001882253673102241, "clip_ratio/low_min": 0.000211413343095046, "clip_ratio/region_mean": 0.0029833500520908274, "epoch": 0.11769303910073559, "grad_norm": 2.799860954284668, "learning_rate": 2e-07, "loss": 0.0323, "step": 1261 }, { "clip_ratio/high_max": 0.0031540299460175447, "clip_ratio/high_mean": 0.001206015947900596, "clip_ratio/low_mean": 0.0017915620264830068, "clip_ratio/low_min": 7.583280057588127e-05, "clip_ratio/region_mean": 0.0029975780416862108, "epoch": 0.11778637220073616, "grad_norm": 0.7450904846191406, "learning_rate": 2e-07, "loss": 0.051, "step": 1262 }, { "clip_ratio/high_max": 0.003167201590258628, "clip_ratio/high_mean": 0.0012370877011562698, "clip_ratio/low_mean": 0.001741114494507201, "clip_ratio/low_min": 0.00010033375656348653, "clip_ratio/region_mean": 0.002978202188387513, "epoch": 0.11787970530073674, "grad_norm": 0.38404929637908936, "learning_rate": 2e-07, "loss": 0.0149, "step": 1263 }, { "clip_ratio/high_max": 0.0028844203043263406, "clip_ratio/high_mean": 0.0011909350942005403, "clip_ratio/low_mean": 0.0018382909038336948, "clip_ratio/low_min": 0.000245137669480755, "clip_ratio/region_mean": 0.003029225998034235, "epoch": 0.11797303840073733, "grad_norm": 0.4256778061389923, "learning_rate": 2e-07, "loss": -0.0039, "step": 1264 }, { "clip_ratio/high_max": 0.0028122517105657607, "clip_ratio/high_mean": 0.0012796037590305787, "clip_ratio/low_mean": 0.0017838043859228492, "clip_ratio/low_min": 0.0001962625829037279, "clip_ratio/region_mean": 0.0030634080758318305, "epoch": 0.11806637150073791, "grad_norm": 0.5721672773361206, "learning_rate": 2e-07, "loss": 0.0035, "step": 1265 }, { "clip_ratio/high_max": 0.002727821694861632, "clip_ratio/high_mean": 0.0011192643833055627, "clip_ratio/low_mean": 0.0017919084712048061, "clip_ratio/low_min": 7.88146280683577e-05, "clip_ratio/region_mean": 0.0029111727853887714, "epoch": 0.1181597046007385, "grad_norm": 0.5882701873779297, "learning_rate": 2e-07, "loss": 0.0431, "step": 1266 }, { "clip_ratio/high_max": 0.003068051693844609, "clip_ratio/high_mean": 0.0013537121703848243, "clip_ratio/low_mean": 0.002128494350472465, "clip_ratio/low_min": 0.0001546155799587723, "clip_ratio/region_mean": 0.0034822066299966536, "epoch": 0.11825303770073908, "grad_norm": 0.5249460339546204, "learning_rate": 2e-07, "loss": 0.0302, "step": 1267 }, { "clip_ratio/high_max": 0.00276425524498336, "clip_ratio/high_mean": 0.0010690280796552543, "clip_ratio/low_mean": 0.0020647762939915992, "clip_ratio/low_min": 0.00010719466808950529, "clip_ratio/region_mean": 0.0031338044645963237, "epoch": 0.11834637080073966, "grad_norm": 0.4720554053783417, "learning_rate": 2e-07, "loss": 0.0449, "step": 1268 }, { "clip_ratio/high_max": 0.0036265780217945576, "clip_ratio/high_mean": 0.0014240723357943352, "clip_ratio/low_mean": 0.001985043694730848, "clip_ratio/low_min": 4.992012691218406e-05, "clip_ratio/region_mean": 0.0034091159905074164, "epoch": 0.11843970390074025, "grad_norm": 0.5207564234733582, "learning_rate": 2e-07, "loss": -0.0582, "step": 1269 }, { "clip_ratio/high_max": 0.0033109773212345317, "clip_ratio/high_mean": 0.001432134238712024, "clip_ratio/low_mean": 0.0018958529108203948, "clip_ratio/low_min": 7.71102168073412e-05, "clip_ratio/region_mean": 0.003327987200464122, "epoch": 0.11853303700074083, "grad_norm": 0.7054007053375244, "learning_rate": 2e-07, "loss": -0.0191, "step": 1270 }, { "clip_ratio/high_max": 0.0032682332530384883, "clip_ratio/high_mean": 0.001393673795973882, "clip_ratio/low_mean": 0.0021051197982160375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003498793550534174, "epoch": 0.11862637010074141, "grad_norm": 0.6108861565589905, "learning_rate": 2e-07, "loss": 0.0299, "step": 1271 }, { "clip_ratio/high_max": 0.002487627265509218, "clip_ratio/high_mean": 0.0011024959057976957, "clip_ratio/low_mean": 0.0022015903850842733, "clip_ratio/low_min": 0.00026101561161340214, "clip_ratio/region_mean": 0.0033040861453628168, "epoch": 0.118719703200742, "grad_norm": 1.3555997610092163, "learning_rate": 2e-07, "loss": 0.0836, "step": 1272 }, { "clip_ratio/high_max": 0.002986916821100749, "clip_ratio/high_mean": 0.0013136349589331076, "clip_ratio/low_mean": 0.0024058460985543206, "clip_ratio/low_min": 0.00019249199249316007, "clip_ratio/region_mean": 0.003719481101143174, "epoch": 0.11881303630074258, "grad_norm": 0.7471872568130493, "learning_rate": 2e-07, "loss": 0.0094, "step": 1273 }, { "clip_ratio/high_max": 0.002896909831179073, "clip_ratio/high_mean": 0.0012772270256391494, "clip_ratio/low_mean": 0.0026320864853914827, "clip_ratio/low_min": 0.0003585569720598869, "clip_ratio/region_mean": 0.003909313600161113, "epoch": 0.11890636940074316, "grad_norm": 2.183903694152832, "learning_rate": 2e-07, "loss": 0.0237, "step": 1274 }, { "clip_ratio/high_max": 0.0030761242160224356, "clip_ratio/high_mean": 0.0012256988757144427, "clip_ratio/low_mean": 0.002636407130921725, "clip_ratio/low_min": 0.0001596409874764504, "clip_ratio/region_mean": 0.0038621058483840898, "epoch": 0.11899970250074375, "grad_norm": 0.8792387843132019, "learning_rate": 2e-07, "loss": 0.027, "step": 1275 }, { "clip_ratio/high_max": 0.003158247818646487, "clip_ratio/high_mean": 0.001368365679809358, "clip_ratio/low_mean": 0.002628737478516996, "clip_ratio/low_min": 0.0002497493005648721, "clip_ratio/region_mean": 0.003997103223809972, "epoch": 0.11909303560074433, "grad_norm": 0.6177733540534973, "learning_rate": 2e-07, "loss": 0.0388, "step": 1276 }, { "clip_ratio/high_max": 0.0032810780394356698, "clip_ratio/high_mean": 0.0013804125264869072, "clip_ratio/low_mean": 0.00288650539732771, "clip_ratio/low_min": 0.00020459869847400114, "clip_ratio/region_mean": 0.004266917865606956, "epoch": 0.11918636870074492, "grad_norm": 1.0134904384613037, "learning_rate": 2e-07, "loss": 0.0349, "step": 1277 }, { "clip_ratio/high_max": 0.002939303609309718, "clip_ratio/high_mean": 0.0012806854101654608, "clip_ratio/low_mean": 0.0030248586590460036, "clip_ratio/low_min": 0.00018192952848039567, "clip_ratio/region_mean": 0.004305544163798913, "epoch": 0.1192797018007455, "grad_norm": 11.782687187194824, "learning_rate": 2e-07, "loss": 0.0426, "step": 1278 }, { "clip_ratio/high_max": 0.00342243084742222, "clip_ratio/high_mean": 0.00147415674655349, "clip_ratio/low_mean": 0.0028162228991277516, "clip_ratio/low_min": 0.00022035415895516053, "clip_ratio/region_mean": 0.004290379656595178, "epoch": 0.11937303490074608, "grad_norm": 1.0729273557662964, "learning_rate": 2e-07, "loss": 0.0749, "step": 1279 }, { "clip_ratio/high_max": 0.0031264541103155352, "clip_ratio/high_mean": 0.001497796238254523, "clip_ratio/low_mean": 0.0035184046646463685, "clip_ratio/low_min": 0.0007855840085539967, "clip_ratio/region_mean": 0.005016200899262913, "epoch": 0.11946636800074667, "grad_norm": 2.210545539855957, "learning_rate": 2e-07, "loss": 0.0771, "step": 1280 }, { "clip_ratio/high_max": 0.0023413691742462106, "clip_ratio/high_mean": 0.0010135633710888214, "clip_ratio/low_mean": 0.0009388794896949548, "clip_ratio/low_min": 4.8965136556944344e-05, "clip_ratio/region_mean": 0.0019524428353179246, "completions/clipped_ratio": 0.0215541294642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 671.4013671875, "completions/mean_terminated_length": 595.9610595703125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.11955970110074725, "grad_norm": 0.3834778070449829, "learning_rate": 2e-07, "loss": 0.0525, "num_tokens": 941233261.0, "reward": 0.5243879556655884, "reward_std": 0.21293023228645325, "rewards/simpleverify_reward/mean": 0.5243878960609436, "rewards/simpleverify_reward/std": 0.49940696358680725, "step": 1281 }, { "clip_ratio/high_max": 0.0020347327226772904, "clip_ratio/high_mean": 0.0009602001373423263, "clip_ratio/low_mean": 0.001020225347019732, "clip_ratio/low_min": 0.00013646185561810853, "clip_ratio/region_mean": 0.0019804254843620583, "epoch": 0.11965303420074783, "grad_norm": 0.2966500520706177, "learning_rate": 2e-07, "loss": 0.0475, "step": 1282 }, { "clip_ratio/high_max": 0.002555000246502459, "clip_ratio/high_mean": 0.0010961001044051955, "clip_ratio/low_mean": 0.0009353373934573028, "clip_ratio/low_min": 8.127438377414364e-06, "clip_ratio/region_mean": 0.002031437477853615, "epoch": 0.11974636730074842, "grad_norm": 0.33522093296051025, "learning_rate": 2e-07, "loss": 0.0165, "step": 1283 }, { "clip_ratio/high_max": 0.00230565297533758, "clip_ratio/high_mean": 0.0009652054832258727, "clip_ratio/low_mean": 0.0010196077964792494, "clip_ratio/low_min": 0.0001025043020490557, "clip_ratio/region_mean": 0.0019848132506012917, "epoch": 0.119839700400749, "grad_norm": 0.5380664467811584, "learning_rate": 2e-07, "loss": 0.0765, "step": 1284 }, { "clip_ratio/high_max": 0.002197891481046099, "clip_ratio/high_mean": 0.0009522697691863868, "clip_ratio/low_mean": 0.0011757086867874023, "clip_ratio/low_min": 0.00015661427096347325, "clip_ratio/region_mean": 0.0021279784268699586, "epoch": 0.11993303350074958, "grad_norm": 0.43300870060920715, "learning_rate": 2e-07, "loss": 0.0827, "step": 1285 }, { "clip_ratio/high_max": 0.002477598929544911, "clip_ratio/high_mean": 0.0009411834453203483, "clip_ratio/low_mean": 0.000982248118816642, "clip_ratio/low_min": 8.634273399366066e-05, "clip_ratio/region_mean": 0.0019234315950598102, "epoch": 0.12002636660075017, "grad_norm": 0.35375577211380005, "learning_rate": 2e-07, "loss": 0.039, "step": 1286 }, { "clip_ratio/high_max": 0.002343535757972859, "clip_ratio/high_mean": 0.0008841321068757679, "clip_ratio/low_mean": 0.0011539738879946526, "clip_ratio/low_min": 0.00014537928745994577, "clip_ratio/region_mean": 0.0020381060094223358, "epoch": 0.12011969970075075, "grad_norm": 0.33422353863716125, "learning_rate": 2e-07, "loss": 0.0873, "step": 1287 }, { "clip_ratio/high_max": 0.0025650129027781077, "clip_ratio/high_mean": 0.0011427720564824995, "clip_ratio/low_mean": 0.001230161615239922, "clip_ratio/low_min": 0.00011141854884044733, "clip_ratio/region_mean": 0.0023729336680844426, "epoch": 0.12021303280075134, "grad_norm": 4.751442909240723, "learning_rate": 2e-07, "loss": 0.0397, "step": 1288 }, { "clip_ratio/high_max": 0.002297170529345749, "clip_ratio/high_mean": 0.0009166823238047073, "clip_ratio/low_mean": 0.001126035705965478, "clip_ratio/low_min": 7.653410921193426e-05, "clip_ratio/region_mean": 0.0020427180206752382, "epoch": 0.12030636590075192, "grad_norm": 0.32877787947654724, "learning_rate": 2e-07, "loss": 0.059, "step": 1289 }, { "clip_ratio/high_max": 0.0021408290158433374, "clip_ratio/high_mean": 0.0008801896019576816, "clip_ratio/low_mean": 0.0010973331482091453, "clip_ratio/low_min": 0.00011789333439082839, "clip_ratio/region_mean": 0.0019775227265199646, "epoch": 0.1203996990007525, "grad_norm": 0.4919995665550232, "learning_rate": 2e-07, "loss": 0.1112, "step": 1290 }, { "clip_ratio/high_max": 0.0022090027123340406, "clip_ratio/high_mean": 0.0008994097606773721, "clip_ratio/low_mean": 0.0011504152298584813, "clip_ratio/low_min": 9.001138550956966e-05, "clip_ratio/region_mean": 0.002049825052381493, "epoch": 0.12049303210075309, "grad_norm": 0.5051014423370361, "learning_rate": 2e-07, "loss": 0.067, "step": 1291 }, { "clip_ratio/high_max": 0.0023545578005723655, "clip_ratio/high_mean": 0.0008758909316384234, "clip_ratio/low_mean": 0.0013859239952580538, "clip_ratio/low_min": 0.00016144527398864739, "clip_ratio/region_mean": 0.0022618149523623288, "epoch": 0.12058636520075366, "grad_norm": 115.59944915771484, "learning_rate": 2e-07, "loss": 0.197, "step": 1292 }, { "clip_ratio/high_max": 0.0018417898900224827, "clip_ratio/high_mean": 0.0007766663547954522, "clip_ratio/low_mean": 0.0012268044338270556, "clip_ratio/low_min": 0.00018717734928941354, "clip_ratio/region_mean": 0.002003470784984529, "epoch": 0.12067969830075424, "grad_norm": 0.4201187789440155, "learning_rate": 2e-07, "loss": 0.1337, "step": 1293 }, { "clip_ratio/high_max": 0.0024212099888245575, "clip_ratio/high_mean": 0.0009775580874702428, "clip_ratio/low_mean": 0.0012244309909874573, "clip_ratio/low_min": 0.00010023408685810864, "clip_ratio/region_mean": 0.0022019890529918484, "epoch": 0.12077303140075484, "grad_norm": 0.41988644003868103, "learning_rate": 2e-07, "loss": 0.0727, "step": 1294 }, { "clip_ratio/high_max": 0.0020032688044011593, "clip_ratio/high_mean": 0.0008800951563898707, "clip_ratio/low_mean": 0.001249365639523603, "clip_ratio/low_min": 0.0001791750446500373, "clip_ratio/region_mean": 0.00212946083775023, "epoch": 0.12086636450075541, "grad_norm": 0.37629029154777527, "learning_rate": 2e-07, "loss": 0.0758, "step": 1295 }, { "clip_ratio/high_max": 0.002368358167586848, "clip_ratio/high_mean": 0.0010499121453904081, "clip_ratio/low_mean": 0.001326764813711634, "clip_ratio/low_min": 0.0001260784019905259, "clip_ratio/region_mean": 0.0023766769736539572, "epoch": 0.12095969760075599, "grad_norm": 0.5631283521652222, "learning_rate": 2e-07, "loss": 0.0986, "step": 1296 }, { "clip_ratio/high_max": 0.0028794024510716554, "clip_ratio/high_mean": 0.001141504870247445, "clip_ratio/low_mean": 0.0011226633050682722, "clip_ratio/low_min": 5.687962402589619e-05, "clip_ratio/region_mean": 0.002264168178953696, "epoch": 0.12105303070075658, "grad_norm": 0.3497691750526428, "learning_rate": 2e-07, "loss": -0.0331, "step": 1297 }, { "clip_ratio/high_max": 0.002290142307174392, "clip_ratio/high_mean": 0.0010665492409316357, "clip_ratio/low_mean": 0.0013688798935618252, "clip_ratio/low_min": 0.00015799380162206944, "clip_ratio/region_mean": 0.00243542916723527, "epoch": 0.12114636380075716, "grad_norm": 2.3204498291015625, "learning_rate": 2e-07, "loss": 0.0627, "step": 1298 }, { "clip_ratio/high_max": 0.0025484256220806856, "clip_ratio/high_mean": 0.0010651455777406227, "clip_ratio/low_mean": 0.0012972953445569146, "clip_ratio/low_min": 0.00012076652819814626, "clip_ratio/region_mean": 0.0023624409150215797, "epoch": 0.12123969690075775, "grad_norm": 1.3894912004470825, "learning_rate": 2e-07, "loss": 0.1373, "step": 1299 }, { "clip_ratio/high_max": 0.0024318516516359523, "clip_ratio/high_mean": 0.0010606912401271984, "clip_ratio/low_mean": 0.0014953310856071766, "clip_ratio/low_min": 0.00025038547573785763, "clip_ratio/region_mean": 0.0025560223875800148, "epoch": 0.12133303000075833, "grad_norm": 1.6366043090820312, "learning_rate": 2e-07, "loss": 0.0487, "step": 1300 }, { "clip_ratio/high_max": 0.002639939477376174, "clip_ratio/high_mean": 0.0010795494199555833, "clip_ratio/low_mean": 0.0013610879541374743, "clip_ratio/low_min": 0.0002206603548984276, "clip_ratio/region_mean": 0.0024406374141108245, "epoch": 0.12142636310075891, "grad_norm": 0.6557591557502747, "learning_rate": 2e-07, "loss": 0.0426, "step": 1301 }, { "clip_ratio/high_max": 0.0024565923813497648, "clip_ratio/high_mean": 0.0009701868584670592, "clip_ratio/low_mean": 0.0015762503317091614, "clip_ratio/low_min": 0.00013898547877033707, "clip_ratio/region_mean": 0.0025464371938141994, "epoch": 0.1215196962007595, "grad_norm": 0.48896560072898865, "learning_rate": 2e-07, "loss": 0.0727, "step": 1302 }, { "clip_ratio/high_max": 0.0026297317817807198, "clip_ratio/high_mean": 0.0011425617121858522, "clip_ratio/low_mean": 0.001752933872921858, "clip_ratio/low_min": 0.00023350463379756548, "clip_ratio/region_mean": 0.002895495606935583, "epoch": 0.12161302930076008, "grad_norm": 1.0576187372207642, "learning_rate": 2e-07, "loss": 0.0877, "step": 1303 }, { "clip_ratio/high_max": 0.0025416556673008017, "clip_ratio/high_mean": 0.0011352275032550097, "clip_ratio/low_mean": 0.0015676365728722885, "clip_ratio/low_min": 9.258618592866696e-05, "clip_ratio/region_mean": 0.0027028641270590015, "epoch": 0.12170636240076066, "grad_norm": 89.3762435913086, "learning_rate": 2e-07, "loss": 0.0277, "step": 1304 }, { "clip_ratio/high_max": 0.002416131268546451, "clip_ratio/high_mean": 0.0009025474937516265, "clip_ratio/low_mean": 0.0017851245429483242, "clip_ratio/low_min": 0.00012899404919153312, "clip_ratio/region_mean": 0.002687672051251866, "epoch": 0.12179969550076125, "grad_norm": 7.530352592468262, "learning_rate": 2e-07, "loss": 0.0824, "step": 1305 }, { "clip_ratio/high_max": 0.0029166760432417504, "clip_ratio/high_mean": 0.0010477814794285223, "clip_ratio/low_mean": 0.001903841271996498, "clip_ratio/low_min": 0.00017777964058041107, "clip_ratio/region_mean": 0.0029516228023567237, "epoch": 0.12189302860076183, "grad_norm": 0.7220433950424194, "learning_rate": 2e-07, "loss": 0.1223, "step": 1306 }, { "clip_ratio/high_max": 0.0028752483340213075, "clip_ratio/high_mean": 0.0010810449966811575, "clip_ratio/low_mean": 0.0020802442813874222, "clip_ratio/low_min": 0.00028315925010247156, "clip_ratio/region_mean": 0.00316128930717241, "epoch": 0.12198636170076241, "grad_norm": 4.103100299835205, "learning_rate": 2e-07, "loss": 0.0671, "step": 1307 }, { "clip_ratio/high_max": 0.0027585926873143762, "clip_ratio/high_mean": 0.0012272206404304598, "clip_ratio/low_mean": 0.002018750623392407, "clip_ratio/low_min": 0.0002049338654614985, "clip_ratio/region_mean": 0.0032459712092531845, "epoch": 0.122079694800763, "grad_norm": 2.666363477706909, "learning_rate": 2e-07, "loss": 0.0322, "step": 1308 }, { "clip_ratio/high_max": 0.0029222180201031733, "clip_ratio/high_mean": 0.0012271265277377097, "clip_ratio/low_mean": 0.0019240626352257095, "clip_ratio/low_min": 0.0003377426000952255, "clip_ratio/region_mean": 0.0031511891720583662, "epoch": 0.12217302790076358, "grad_norm": 5.617126941680908, "learning_rate": 2e-07, "loss": 0.0843, "step": 1309 }, { "clip_ratio/high_max": 0.0028127822588430718, "clip_ratio/high_mean": 0.0012317429609538522, "clip_ratio/low_mean": 0.002120734927302692, "clip_ratio/low_min": 0.00025279479268647265, "clip_ratio/region_mean": 0.0033524778846185654, "epoch": 0.12226636100076417, "grad_norm": 1.0410999059677124, "learning_rate": 2e-07, "loss": 0.1051, "step": 1310 }, { "clip_ratio/high_max": 0.0027450981942820363, "clip_ratio/high_mean": 0.0011484237693366595, "clip_ratio/low_mean": 0.0021880975473322906, "clip_ratio/low_min": 5.8360145885671955e-05, "clip_ratio/region_mean": 0.0033365212875651196, "epoch": 0.12235969410076475, "grad_norm": 6.011181354522705, "learning_rate": 2e-07, "loss": 0.0833, "step": 1311 }, { "clip_ratio/high_max": 0.00256658283615252, "clip_ratio/high_mean": 0.0012264629858691478, "clip_ratio/low_mean": 0.0025807344500208274, "clip_ratio/low_min": 0.0005743810470448807, "clip_ratio/region_mean": 0.0038071974413469434, "epoch": 0.12245302720076533, "grad_norm": 20.54705810546875, "learning_rate": 2e-07, "loss": 0.0776, "step": 1312 }, { "clip_ratio/high_max": 0.003388223449292127, "clip_ratio/high_mean": 0.001424372363544535, "clip_ratio/low_mean": 0.002528334269300103, "clip_ratio/low_min": 0.00027897344443772454, "clip_ratio/region_mean": 0.00395270659646485, "epoch": 0.12254636030076592, "grad_norm": 1.5548619031906128, "learning_rate": 2e-07, "loss": 0.0635, "step": 1313 }, { "clip_ratio/high_max": 0.003321803524158895, "clip_ratio/high_mean": 0.0013250306619738694, "clip_ratio/low_mean": 0.0023411684596794657, "clip_ratio/low_min": 0.000234968361837673, "clip_ratio/region_mean": 0.0036661991180153564, "epoch": 0.1226396934007665, "grad_norm": 2.387840986251831, "learning_rate": 2e-07, "loss": 0.0263, "step": 1314 }, { "clip_ratio/high_max": 0.003023161021701526, "clip_ratio/high_mean": 0.001289168998482637, "clip_ratio/low_mean": 0.0028119789676566143, "clip_ratio/low_min": 0.0003713697633429547, "clip_ratio/region_mean": 0.004101147947949357, "epoch": 0.12273302650076708, "grad_norm": 1.8666458129882812, "learning_rate": 2e-07, "loss": 0.081, "step": 1315 }, { "clip_ratio/high_max": 0.0033762019229470752, "clip_ratio/high_mean": 0.0015177727364061866, "clip_ratio/low_mean": 0.0030554325712728314, "clip_ratio/low_min": 0.0003842158694169484, "clip_ratio/region_mean": 0.004573205238557421, "epoch": 0.12282635960076767, "grad_norm": 1.4815889596939087, "learning_rate": 2e-07, "loss": 0.0877, "step": 1316 }, { "clip_ratio/high_max": 0.003608994054957293, "clip_ratio/high_mean": 0.0015534269768977538, "clip_ratio/low_mean": 0.002712101675570011, "clip_ratio/low_min": 0.0003946566430386156, "clip_ratio/region_mean": 0.00426552866701968, "epoch": 0.12291969270076825, "grad_norm": 0.7337727546691895, "learning_rate": 2e-07, "loss": 0.0169, "step": 1317 }, { "clip_ratio/high_max": 0.003594927504309453, "clip_ratio/high_mean": 0.001376797696138965, "clip_ratio/low_mean": 0.0034329461996094324, "clip_ratio/low_min": 0.0008164546125044581, "clip_ratio/region_mean": 0.004809743884834461, "epoch": 0.12301302580076884, "grad_norm": 1.6185722351074219, "learning_rate": 2e-07, "loss": 0.0626, "step": 1318 }, { "clip_ratio/high_max": 0.0028419867885531858, "clip_ratio/high_mean": 0.0012378473438729998, "clip_ratio/low_mean": 0.003184839035384357, "clip_ratio/low_min": 0.00015968134539434686, "clip_ratio/region_mean": 0.004422686353791505, "epoch": 0.12310635890076942, "grad_norm": 1.352743148803711, "learning_rate": 2e-07, "loss": 0.1029, "step": 1319 }, { "clip_ratio/high_max": 0.0032617323959129862, "clip_ratio/high_mean": 0.001372216149320593, "clip_ratio/low_mean": 0.0037166127876844257, "clip_ratio/low_min": 0.00030627874366473407, "clip_ratio/region_mean": 0.005088829013402574, "epoch": 0.12319969200077, "grad_norm": 1.348265528678894, "learning_rate": 2e-07, "loss": 0.0671, "step": 1320 }, { "clip_ratio/high_max": 0.0035115740392939188, "clip_ratio/high_mean": 0.0015300203376682475, "clip_ratio/low_mean": 0.00337849126663059, "clip_ratio/low_min": 0.0002684738992684288, "clip_ratio/region_mean": 0.004908511662506498, "epoch": 0.12329302510077059, "grad_norm": 4.012618064880371, "learning_rate": 2e-07, "loss": 0.0379, "step": 1321 }, { "clip_ratio/high_max": 0.0036439554241951555, "clip_ratio/high_mean": 0.0014297419402282685, "clip_ratio/low_mean": 0.003916191621101461, "clip_ratio/low_min": 0.0008333190125995316, "clip_ratio/region_mean": 0.0053459336049854755, "epoch": 0.12338635820077117, "grad_norm": 2.0804901123046875, "learning_rate": 2e-07, "loss": 0.0631, "step": 1322 }, { "clip_ratio/high_max": 0.003811981041508261, "clip_ratio/high_mean": 0.0017699788841127884, "clip_ratio/low_mean": 0.0032547361333854496, "clip_ratio/low_min": 0.00044341111424728297, "clip_ratio/region_mean": 0.005024715006584302, "epoch": 0.12347969130077174, "grad_norm": 1.471629023551941, "learning_rate": 2e-07, "loss": 0.0405, "step": 1323 }, { "clip_ratio/high_max": 0.004324477442423813, "clip_ratio/high_mean": 0.0018312577085453086, "clip_ratio/low_mean": 0.0037654339175787754, "clip_ratio/low_min": 0.0005252777427813271, "clip_ratio/region_mean": 0.005596691713435575, "epoch": 0.12357302440077234, "grad_norm": 58.86204147338867, "learning_rate": 2e-07, "loss": 0.006, "step": 1324 }, { "clip_ratio/high_max": 0.0036713376102852635, "clip_ratio/high_mean": 0.001600882660568459, "clip_ratio/low_mean": 0.003650217411632184, "clip_ratio/low_min": 0.0006687878776574507, "clip_ratio/region_mean": 0.005251100083114579, "epoch": 0.12366635750077291, "grad_norm": 2.301476240158081, "learning_rate": 2e-07, "loss": 0.0677, "step": 1325 }, { "clip_ratio/high_max": 0.004266634598025121, "clip_ratio/high_mean": 0.0016173775293282233, "clip_ratio/low_mean": 0.004164997080806643, "clip_ratio/low_min": 0.0004247131528245518, "clip_ratio/region_mean": 0.005782374617410824, "epoch": 0.12375969060077349, "grad_norm": 2.031250476837158, "learning_rate": 2e-07, "loss": 0.0861, "step": 1326 }, { "clip_ratio/high_max": 0.003821195219643414, "clip_ratio/high_mean": 0.0018147397349821404, "clip_ratio/low_mean": 0.004478591159568168, "clip_ratio/low_min": 0.000371083309801179, "clip_ratio/region_mean": 0.006293331156484783, "epoch": 0.12385302370077408, "grad_norm": 20.447481155395508, "learning_rate": 2e-07, "loss": 0.0841, "step": 1327 }, { "clip_ratio/high_max": 0.004403159429784864, "clip_ratio/high_mean": 0.0018187564382969867, "clip_ratio/low_mean": 0.004737912488053553, "clip_ratio/low_min": 0.0010361957074564998, "clip_ratio/region_mean": 0.0065566688135731965, "epoch": 0.12394635680077466, "grad_norm": 82.6592025756836, "learning_rate": 2e-07, "loss": 0.1064, "step": 1328 }, { "clip_ratio/high_max": 0.004200952156679705, "clip_ratio/high_mean": 0.0018556630348030012, "clip_ratio/low_mean": 0.004676060838392004, "clip_ratio/low_min": 0.0008026572286325973, "clip_ratio/region_mean": 0.006531723833177239, "epoch": 0.12403968990077525, "grad_norm": 1.5106040239334106, "learning_rate": 2e-07, "loss": 0.1013, "step": 1329 }, { "clip_ratio/high_max": 0.004967228669556789, "clip_ratio/high_mean": 0.0018556908617028967, "clip_ratio/low_mean": 0.004661493469029665, "clip_ratio/low_min": 0.0004244707524776459, "clip_ratio/region_mean": 0.006517184403492138, "epoch": 0.12413302300077583, "grad_norm": 578.4109497070312, "learning_rate": 2e-07, "loss": 0.129, "step": 1330 }, { "clip_ratio/high_max": 0.00424294667027425, "clip_ratio/high_mean": 0.001990495409700088, "clip_ratio/low_mean": 0.004988981003407389, "clip_ratio/low_min": 0.00045367484926828183, "clip_ratio/region_mean": 0.006979476340347901, "epoch": 0.12422635610077641, "grad_norm": 2.2251698970794678, "learning_rate": 2e-07, "loss": 0.0413, "step": 1331 }, { "clip_ratio/high_max": 0.004684070241637528, "clip_ratio/high_mean": 0.0019796414162556175, "clip_ratio/low_mean": 0.004559386943583377, "clip_ratio/low_min": 0.00028924037906108424, "clip_ratio/region_mean": 0.006539028283441439, "epoch": 0.124319689200777, "grad_norm": 6.738215446472168, "learning_rate": 2e-07, "loss": 0.0612, "step": 1332 }, { "clip_ratio/high_max": 0.005025810140068643, "clip_ratio/high_mean": 0.002267664203827735, "clip_ratio/low_mean": 0.00546437400043942, "clip_ratio/low_min": 0.001307138210904668, "clip_ratio/region_mean": 0.0077320382406469434, "epoch": 0.12441302230077758, "grad_norm": 3.0532329082489014, "learning_rate": 2e-07, "loss": 0.0579, "step": 1333 }, { "clip_ratio/high_max": 0.005922570111579262, "clip_ratio/high_mean": 0.0022617512004217133, "clip_ratio/low_mean": 0.005171626646188088, "clip_ratio/low_min": 0.0007059713097987697, "clip_ratio/region_mean": 0.00743337775929831, "epoch": 0.12450635540077816, "grad_norm": 50.08420181274414, "learning_rate": 2e-07, "loss": 0.0603, "step": 1334 }, { "clip_ratio/high_max": 0.005804768254165538, "clip_ratio/high_mean": 0.0024631434789625928, "clip_ratio/low_mean": 0.005061570424004458, "clip_ratio/low_min": 0.0004477602196857333, "clip_ratio/region_mean": 0.007524714048486203, "epoch": 0.12459968850077875, "grad_norm": 2.4682512283325195, "learning_rate": 2e-07, "loss": 0.0451, "step": 1335 }, { "clip_ratio/high_max": 0.004563208742183633, "clip_ratio/high_mean": 0.0021618024111376144, "clip_ratio/low_mean": 0.005883397767320275, "clip_ratio/low_min": 0.0007267982509802096, "clip_ratio/region_mean": 0.008045199938351288, "epoch": 0.12469302160077933, "grad_norm": 853.88720703125, "learning_rate": 2e-07, "loss": 0.1085, "step": 1336 }, { "clip_ratio/high_max": 0.005174192032427527, "clip_ratio/high_mean": 0.002181114425184205, "clip_ratio/low_mean": 0.006028450807207264, "clip_ratio/low_min": 0.0008671490359120071, "clip_ratio/region_mean": 0.008209565101424232, "epoch": 0.12478635470077991, "grad_norm": 1169.3795166015625, "learning_rate": 2e-07, "loss": 0.1763, "step": 1337 }, { "clip_ratio/high_max": 0.004815749649424106, "clip_ratio/high_mean": 0.002192610812926432, "clip_ratio/low_mean": 0.005014346505049616, "clip_ratio/low_min": 0.0006766083897673525, "clip_ratio/region_mean": 0.007206957176094875, "epoch": 0.1248796878007805, "grad_norm": 48.125980377197266, "learning_rate": 2e-07, "loss": 0.1083, "step": 1338 }, { "clip_ratio/high_max": 0.007460586479282938, "clip_ratio/high_mean": 0.0030897234319127165, "clip_ratio/low_mean": 0.005521428713109344, "clip_ratio/low_min": 0.0007562608880107291, "clip_ratio/region_mean": 0.008611152123194188, "epoch": 0.12497302090078108, "grad_norm": 3.5818192958831787, "learning_rate": 2e-07, "loss": 0.0763, "step": 1339 }, { "clip_ratio/high_max": 0.006609363699681126, "clip_ratio/high_mean": 0.0029561710034613498, "clip_ratio/low_mean": 0.005504505868884735, "clip_ratio/low_min": 0.0010176690375374164, "clip_ratio/region_mean": 0.008460676937829703, "epoch": 0.12506635400078167, "grad_norm": 3.113283634185791, "learning_rate": 2e-07, "loss": 0.0568, "step": 1340 }, { "clip_ratio/high_max": 0.005080006376374513, "clip_ratio/high_mean": 0.0024947998026618734, "clip_ratio/low_mean": 0.00578794079774525, "clip_ratio/low_min": 0.0005818089193780906, "clip_ratio/region_mean": 0.008282740833237767, "epoch": 0.12515968710078224, "grad_norm": 15.460991859436035, "learning_rate": 2e-07, "loss": 0.0703, "step": 1341 }, { "clip_ratio/high_max": 0.00700264677288942, "clip_ratio/high_mean": 0.002902157575590536, "clip_ratio/low_mean": 0.0056607086735311896, "clip_ratio/low_min": 0.000527001826412743, "clip_ratio/region_mean": 0.008562866161810234, "epoch": 0.12525302020078283, "grad_norm": 4.862478733062744, "learning_rate": 2e-07, "loss": 0.0402, "step": 1342 }, { "clip_ratio/high_max": 0.006221263567567803, "clip_ratio/high_mean": 0.002878733357647434, "clip_ratio/low_mean": 0.006110047164838761, "clip_ratio/low_min": 0.0015404175501316786, "clip_ratio/region_mean": 0.008988780406070873, "epoch": 0.12534635330078342, "grad_norm": 4.505019664764404, "learning_rate": 2e-07, "loss": 0.0632, "step": 1343 }, { "clip_ratio/high_max": 0.00627762998919934, "clip_ratio/high_mean": 0.0030187028241925873, "clip_ratio/low_mean": 0.00635311589576304, "clip_ratio/low_min": 0.0007405232390738092, "clip_ratio/region_mean": 0.009371819003717974, "epoch": 0.125439686400784, "grad_norm": 9.610197067260742, "learning_rate": 2e-07, "loss": 0.0725, "step": 1344 }, { "clip_ratio/high_max": 0.005965406686300412, "clip_ratio/high_mean": 0.0025150028814096004, "clip_ratio/low_mean": 0.006935847079148516, "clip_ratio/low_min": 0.0013464745279634371, "clip_ratio/region_mean": 0.0094508501351811, "epoch": 0.12553301950078458, "grad_norm": 39.79737854003906, "learning_rate": 2e-07, "loss": 0.0697, "step": 1345 }, { "clip_ratio/high_max": 0.00681155442725867, "clip_ratio/high_mean": 0.003077587200095877, "clip_ratio/low_mean": 0.00730896185268648, "clip_ratio/low_min": 0.001295210124226287, "clip_ratio/region_mean": 0.010386548849055544, "epoch": 0.12562635260078517, "grad_norm": 52.230079650878906, "learning_rate": 2e-07, "loss": 0.0698, "step": 1346 }, { "clip_ratio/high_max": 0.007904044090537354, "clip_ratio/high_mean": 0.0034034777054330334, "clip_ratio/low_mean": 0.006876679166452959, "clip_ratio/low_min": 0.0006258564171730541, "clip_ratio/region_mean": 0.010280157119268551, "epoch": 0.12571968570078576, "grad_norm": 4.530972003936768, "learning_rate": 2e-07, "loss": 0.0627, "step": 1347 }, { "clip_ratio/high_max": 0.007020214485237375, "clip_ratio/high_mean": 0.003018514769792091, "clip_ratio/low_mean": 0.006489370425697416, "clip_ratio/low_min": 0.00043017041025450453, "clip_ratio/region_mean": 0.009507884999038652, "epoch": 0.12581301880078632, "grad_norm": 4.551019668579102, "learning_rate": 2e-07, "loss": 0.0581, "step": 1348 }, { "clip_ratio/high_max": 0.007822214582120068, "clip_ratio/high_mean": 0.0033007864403771237, "clip_ratio/low_mean": 0.006442233090638183, "clip_ratio/low_min": 0.00027159501769347116, "clip_ratio/region_mean": 0.009743019443703815, "epoch": 0.12590635190078692, "grad_norm": 565.720947265625, "learning_rate": 2e-07, "loss": 0.0565, "step": 1349 }, { "clip_ratio/high_max": 0.007591606481582858, "clip_ratio/high_mean": 0.0033883271244121715, "clip_ratio/low_mean": 0.006876061583170667, "clip_ratio/low_min": 0.0005198039798415266, "clip_ratio/region_mean": 0.010264388780342415, "epoch": 0.1259996850007875, "grad_norm": 12.586240768432617, "learning_rate": 2e-07, "loss": 0.0306, "step": 1350 }, { "clip_ratio/high_max": 0.008313079713843763, "clip_ratio/high_mean": 0.003342870477354154, "clip_ratio/low_mean": 0.006762599179637618, "clip_ratio/low_min": 0.0004602214612532407, "clip_ratio/region_mean": 0.010105469758855179, "epoch": 0.12609301810078807, "grad_norm": 12.566760063171387, "learning_rate": 2e-07, "loss": 0.0581, "step": 1351 }, { "clip_ratio/high_max": 0.00777222377655562, "clip_ratio/high_mean": 0.003248009437811561, "clip_ratio/low_mean": 0.007745289301965386, "clip_ratio/low_min": 0.0016451773335575126, "clip_ratio/region_mean": 0.010993298725225031, "epoch": 0.12618635120078867, "grad_norm": 2.991405963897705, "learning_rate": 2e-07, "loss": 0.0585, "step": 1352 }, { "clip_ratio/high_max": 0.006642212421866134, "clip_ratio/high_mean": 0.0031359772256109864, "clip_ratio/low_mean": 0.0071029325044946745, "clip_ratio/low_min": 0.00031318347464548424, "clip_ratio/region_mean": 0.010238909686449915, "epoch": 0.12627968430078926, "grad_norm": 15.164664268493652, "learning_rate": 2e-07, "loss": 0.0365, "step": 1353 }, { "clip_ratio/high_max": 0.007622488745255396, "clip_ratio/high_mean": 0.0037569389678537846, "clip_ratio/low_mean": 0.007462243331247009, "clip_ratio/low_min": 0.001169490434222098, "clip_ratio/region_mean": 0.011219182168133557, "epoch": 0.12637301740078982, "grad_norm": 12.094339370727539, "learning_rate": 2e-07, "loss": 0.0684, "step": 1354 }, { "clip_ratio/high_max": 0.007589510292746127, "clip_ratio/high_mean": 0.003657142646261491, "clip_ratio/low_mean": 0.0077282383281271905, "clip_ratio/low_min": 0.0007491261203540489, "clip_ratio/region_mean": 0.011385381105355918, "epoch": 0.12646635050079041, "grad_norm": 80.28907775878906, "learning_rate": 2e-07, "loss": -0.0168, "step": 1355 }, { "clip_ratio/high_max": 0.008491607033647597, "clip_ratio/high_mean": 0.0038237879998632707, "clip_ratio/low_mean": 0.007797091122483835, "clip_ratio/low_min": 0.0015984141209628433, "clip_ratio/region_mean": 0.011620879406109452, "epoch": 0.126559683600791, "grad_norm": 65382.0625, "learning_rate": 2e-07, "loss": 2.3851, "step": 1356 }, { "clip_ratio/high_max": 0.008770831555011682, "clip_ratio/high_mean": 0.003798535355599597, "clip_ratio/low_mean": 0.007718040535110049, "clip_ratio/low_min": 0.0009147957498498727, "clip_ratio/region_mean": 0.011516576167196035, "epoch": 0.12665301670079157, "grad_norm": 17.83380126953125, "learning_rate": 2e-07, "loss": 0.0511, "step": 1357 }, { "clip_ratio/high_max": 0.008397737692575902, "clip_ratio/high_mean": 0.0038584269350394607, "clip_ratio/low_mean": 0.009711183636682108, "clip_ratio/low_min": 0.0014238382354960777, "clip_ratio/region_mean": 0.013569610484410077, "epoch": 0.12674634980079216, "grad_norm": 133.03173828125, "learning_rate": 2e-07, "loss": 0.1153, "step": 1358 }, { "clip_ratio/high_max": 0.009064075697096996, "clip_ratio/high_mean": 0.004104536637896672, "clip_ratio/low_mean": 0.008024576440220699, "clip_ratio/low_min": 0.0008421769452979788, "clip_ratio/region_mean": 0.012129112961702049, "epoch": 0.12683968290079276, "grad_norm": 13.689435005187988, "learning_rate": 2e-07, "loss": 0.0367, "step": 1359 }, { "clip_ratio/high_max": 0.008122974948491901, "clip_ratio/high_mean": 0.003686619849759154, "clip_ratio/low_mean": 0.007701306574745104, "clip_ratio/low_min": 0.001500025529821869, "clip_ratio/region_mean": 0.011387926497263834, "epoch": 0.12693301600079332, "grad_norm": 7.920009136199951, "learning_rate": 2e-07, "loss": 0.0752, "step": 1360 }, { "clip_ratio/high_max": 0.008824864358757623, "clip_ratio/high_mean": 0.004094178482773714, "clip_ratio/low_mean": 0.00784577606827952, "clip_ratio/low_min": 0.0011034290655516088, "clip_ratio/region_mean": 0.011939954536501318, "epoch": 0.1270263491007939, "grad_norm": 5.753616809844971, "learning_rate": 2e-07, "loss": 0.0769, "step": 1361 }, { "clip_ratio/high_max": 0.008815786874038167, "clip_ratio/high_mean": 0.0038370960755855776, "clip_ratio/low_mean": 0.00890799638000317, "clip_ratio/low_min": 0.0010157091783185024, "clip_ratio/region_mean": 0.012745092331897467, "epoch": 0.1271196822007945, "grad_norm": 4.31011438369751, "learning_rate": 2e-07, "loss": 0.0544, "step": 1362 }, { "clip_ratio/high_max": 0.009173411905067042, "clip_ratio/high_mean": 0.004206277561024763, "clip_ratio/low_mean": 0.008902590576326475, "clip_ratio/low_min": 0.0012337966018094448, "clip_ratio/region_mean": 0.013108868384733796, "epoch": 0.12721301530079507, "grad_norm": 113.02539825439453, "learning_rate": 2e-07, "loss": 0.0532, "step": 1363 }, { "clip_ratio/high_max": 0.009732156933750957, "clip_ratio/high_mean": 0.004296673243516125, "clip_ratio/low_mean": 0.009950926672900096, "clip_ratio/low_min": 0.0017321819614153355, "clip_ratio/region_mean": 0.014247599698137492, "epoch": 0.12730634840079566, "grad_norm": 25.278486251831055, "learning_rate": 2e-07, "loss": 0.0657, "step": 1364 }, { "clip_ratio/high_max": 0.009044782869750634, "clip_ratio/high_mean": 0.004483708820771426, "clip_ratio/low_mean": 0.009401712595717981, "clip_ratio/low_min": 0.0006635219742747722, "clip_ratio/region_mean": 0.013885421503800899, "epoch": 0.12739968150079625, "grad_norm": 4.522478103637695, "learning_rate": 2e-07, "loss": 0.0254, "step": 1365 }, { "clip_ratio/high_max": 0.009749944772920571, "clip_ratio/high_mean": 0.004411700530909002, "clip_ratio/low_mean": 0.009453783743083477, "clip_ratio/low_min": 0.0006407620458048768, "clip_ratio/region_mean": 0.013865483924746513, "epoch": 0.12749301460079684, "grad_norm": 170.24468994140625, "learning_rate": 2e-07, "loss": 0.0452, "step": 1366 }, { "clip_ratio/high_max": 0.008658615901367739, "clip_ratio/high_mean": 0.004075836477568373, "clip_ratio/low_mean": 0.010906891111517325, "clip_ratio/low_min": 0.0020341548915894236, "clip_ratio/region_mean": 0.014982727472670376, "epoch": 0.1275863477007974, "grad_norm": 7.427040100097656, "learning_rate": 2e-07, "loss": 0.1289, "step": 1367 }, { "clip_ratio/high_max": 0.009229083661921322, "clip_ratio/high_mean": 0.0038376564771169797, "clip_ratio/low_mean": 0.009799160587135702, "clip_ratio/low_min": 0.0010566769051365554, "clip_ratio/region_mean": 0.013636817049700767, "epoch": 0.127679680800798, "grad_norm": 15.811633110046387, "learning_rate": 2e-07, "loss": 0.0882, "step": 1368 }, { "clip_ratio/high_max": 0.009676406334619969, "clip_ratio/high_mean": 0.004735783310024999, "clip_ratio/low_mean": 0.01049866178072989, "clip_ratio/low_min": 0.0016753449526731856, "clip_ratio/region_mean": 0.0152344448142685, "epoch": 0.1277730139007986, "grad_norm": 5.762593746185303, "learning_rate": 2e-07, "loss": 0.0405, "step": 1369 }, { "clip_ratio/high_max": 0.009980883914977312, "clip_ratio/high_mean": 0.004616628692019731, "clip_ratio/low_mean": 0.008923128742026165, "clip_ratio/low_min": 0.00045701463386649266, "clip_ratio/region_mean": 0.013539757521357387, "epoch": 0.12786634700079916, "grad_norm": 4.980777740478516, "learning_rate": 2e-07, "loss": 0.0358, "step": 1370 }, { "clip_ratio/high_max": 0.010181256919167936, "clip_ratio/high_mean": 0.0046004070827621035, "clip_ratio/low_mean": 0.01055732331587933, "clip_ratio/low_min": 0.0017352494978695177, "clip_ratio/region_mean": 0.015157730435021222, "epoch": 0.12795968010079975, "grad_norm": 14.894510269165039, "learning_rate": 2e-07, "loss": 0.0397, "step": 1371 }, { "clip_ratio/high_max": 0.011247337533859536, "clip_ratio/high_mean": 0.005018330542952754, "clip_ratio/low_mean": 0.010208075880655088, "clip_ratio/low_min": 0.0013873384014004841, "clip_ratio/region_mean": 0.015226406103465706, "epoch": 0.12805301320080034, "grad_norm": 1238.2757568359375, "learning_rate": 2e-07, "loss": 0.1193, "step": 1372 }, { "clip_ratio/high_max": 0.010289447964169085, "clip_ratio/high_mean": 0.0050898346526082605, "clip_ratio/low_mean": 0.009937901690136641, "clip_ratio/low_min": 0.0003622362783062272, "clip_ratio/region_mean": 0.015027735847979784, "epoch": 0.1281463463008009, "grad_norm": 10.726532936096191, "learning_rate": 2e-07, "loss": 0.0353, "step": 1373 }, { "clip_ratio/high_max": 0.011398923001252115, "clip_ratio/high_mean": 0.004539710396784358, "clip_ratio/low_mean": 0.012131829949794337, "clip_ratio/low_min": 0.0021259820350678638, "clip_ratio/region_mean": 0.0166715404484421, "epoch": 0.1282396794008015, "grad_norm": 2891.561279296875, "learning_rate": 2e-07, "loss": 0.1399, "step": 1374 }, { "clip_ratio/high_max": 0.010754793969681486, "clip_ratio/high_mean": 0.00516165177396033, "clip_ratio/low_mean": 0.010866523312870413, "clip_ratio/low_min": 0.0013664298312505707, "clip_ratio/region_mean": 0.01602817513048649, "epoch": 0.1283330125008021, "grad_norm": 430.67791748046875, "learning_rate": 2e-07, "loss": 0.0588, "step": 1375 }, { "clip_ratio/high_max": 0.010772893438115716, "clip_ratio/high_mean": 0.00479254581296118, "clip_ratio/low_mean": 0.011534293764270842, "clip_ratio/low_min": 0.001417097395460587, "clip_ratio/region_mean": 0.016326839569956064, "epoch": 0.12842634560080265, "grad_norm": 3549.003662109375, "learning_rate": 2e-07, "loss": 0.2066, "step": 1376 }, { "clip_ratio/high_max": 0.012813848414225504, "clip_ratio/high_mean": 0.006158334595966153, "clip_ratio/low_mean": 0.012775158626027405, "clip_ratio/low_min": 0.0019102286896668375, "clip_ratio/region_mean": 0.018933493236545473, "epoch": 0.12851967870080325, "grad_norm": 35.26154708862305, "learning_rate": 2e-07, "loss": 0.0932, "step": 1377 }, { "clip_ratio/high_max": 0.013802618195768446, "clip_ratio/high_mean": 0.006466358056059107, "clip_ratio/low_mean": 0.012821750598959625, "clip_ratio/low_min": 0.0012362277047941461, "clip_ratio/region_mean": 0.019288108684122562, "epoch": 0.12861301180080384, "grad_norm": 12.446272850036621, "learning_rate": 2e-07, "loss": 0.0731, "step": 1378 }, { "clip_ratio/high_max": 0.013600453356048092, "clip_ratio/high_mean": 0.005589053893345408, "clip_ratio/low_mean": 0.011182608315721154, "clip_ratio/low_min": 0.001316051246249117, "clip_ratio/region_mean": 0.01677166239824146, "epoch": 0.1287063449008044, "grad_norm": 56.86370849609375, "learning_rate": 2e-07, "loss": 0.0845, "step": 1379 }, { "clip_ratio/high_max": 0.014078281965339556, "clip_ratio/high_mean": 0.005915598812862299, "clip_ratio/low_mean": 0.012466812884667888, "clip_ratio/low_min": 0.001591451422427781, "clip_ratio/region_mean": 0.018382411799393594, "epoch": 0.128799678000805, "grad_norm": 169.367919921875, "learning_rate": 2e-07, "loss": 0.0855, "step": 1380 }, { "clip_ratio/high_max": 0.01187056937487796, "clip_ratio/high_mean": 0.005514364718692377, "clip_ratio/low_mean": 0.011443059687735513, "clip_ratio/low_min": 0.0017501942056696862, "clip_ratio/region_mean": 0.01695742440642789, "epoch": 0.1288930111008056, "grad_norm": 37.27037811279297, "learning_rate": 2e-07, "loss": 0.1033, "step": 1381 }, { "clip_ratio/high_max": 0.013064442726317793, "clip_ratio/high_mean": 0.006072487230994739, "clip_ratio/low_mean": 0.012485475308494642, "clip_ratio/low_min": 0.0018954028128064238, "clip_ratio/region_mean": 0.018557962670456618, "epoch": 0.12898634420080615, "grad_norm": 9.110992431640625, "learning_rate": 2e-07, "loss": 0.0558, "step": 1382 }, { "clip_ratio/high_max": 0.014177469623973593, "clip_ratio/high_mean": 0.005960215989034623, "clip_ratio/low_mean": 0.01333567698020488, "clip_ratio/low_min": 0.0018166062473028433, "clip_ratio/region_mean": 0.019295892619993538, "epoch": 0.12907967730080674, "grad_norm": 637.1403198242188, "learning_rate": 2e-07, "loss": 0.0821, "step": 1383 }, { "clip_ratio/high_max": 0.013291084149386734, "clip_ratio/high_mean": 0.006330167132546194, "clip_ratio/low_mean": 0.014193860988598317, "clip_ratio/low_min": 0.0024563579936511815, "clip_ratio/region_mean": 0.02052402769913897, "epoch": 0.12917301040080734, "grad_norm": 42.615577697753906, "learning_rate": 2e-07, "loss": 0.1069, "step": 1384 }, { "clip_ratio/high_max": 0.013018363300943747, "clip_ratio/high_mean": 0.005892847708310001, "clip_ratio/low_mean": 0.011460623238235712, "clip_ratio/low_min": 0.0006662479136139154, "clip_ratio/region_mean": 0.01735347107751295, "epoch": 0.1292663435008079, "grad_norm": 43.0958137512207, "learning_rate": 2e-07, "loss": 0.0607, "step": 1385 }, { "clip_ratio/high_max": 0.013428730890154839, "clip_ratio/high_mean": 0.006094695927458815, "clip_ratio/low_mean": 0.012777787167578936, "clip_ratio/low_min": 0.0019133788446197286, "clip_ratio/region_mean": 0.018872483575250953, "epoch": 0.1293596766008085, "grad_norm": 243.92605590820312, "learning_rate": 2e-07, "loss": 0.0711, "step": 1386 }, { "clip_ratio/high_max": 0.014621897949837148, "clip_ratio/high_mean": 0.006921335094375536, "clip_ratio/low_mean": 0.012069863820215687, "clip_ratio/low_min": 0.0010350085940444842, "clip_ratio/region_mean": 0.018991198972798884, "epoch": 0.12945300970080909, "grad_norm": 742.0355224609375, "learning_rate": 2e-07, "loss": 0.0868, "step": 1387 }, { "clip_ratio/high_max": 0.01423992743366398, "clip_ratio/high_mean": 0.006241173599846661, "clip_ratio/low_mean": 0.015176182991126552, "clip_ratio/low_min": 0.0018747244321275502, "clip_ratio/region_mean": 0.021417356561869383, "epoch": 0.12954634280080968, "grad_norm": 34.0280647277832, "learning_rate": 2e-07, "loss": 0.1654, "step": 1388 }, { "clip_ratio/high_max": 0.014356733561726287, "clip_ratio/high_mean": 0.006569287637830712, "clip_ratio/low_mean": 0.012792830064427108, "clip_ratio/low_min": 0.002636040124343708, "clip_ratio/region_mean": 0.019362117571290582, "epoch": 0.12963967590081024, "grad_norm": 9.022438049316406, "learning_rate": 2e-07, "loss": 0.0351, "step": 1389 }, { "clip_ratio/high_max": 0.01657727302517742, "clip_ratio/high_mean": 0.00692776867072098, "clip_ratio/low_mean": 0.012745752144837752, "clip_ratio/low_min": 0.0019026008085347712, "clip_ratio/region_mean": 0.01967352075735107, "epoch": 0.12973300900081083, "grad_norm": 32.590484619140625, "learning_rate": 2e-07, "loss": 0.0707, "step": 1390 }, { "clip_ratio/high_max": 0.014896863358444534, "clip_ratio/high_mean": 0.006878184423840139, "clip_ratio/low_mean": 0.01464800105895847, "clip_ratio/low_min": 0.0016213481721933931, "clip_ratio/region_mean": 0.021526186144910753, "epoch": 0.12982634210081143, "grad_norm": 10.015687942504883, "learning_rate": 2e-07, "loss": 0.0463, "step": 1391 }, { "clip_ratio/high_max": 0.014706387562910095, "clip_ratio/high_mean": 0.006095006465329789, "clip_ratio/low_mean": 0.016185540473088622, "clip_ratio/low_min": 0.0024724976829020306, "clip_ratio/region_mean": 0.022280546720139682, "epoch": 0.129919675200812, "grad_norm": 69.58390808105469, "learning_rate": 2e-07, "loss": 0.0879, "step": 1392 }, { "clip_ratio/high_max": 0.01370748653425835, "clip_ratio/high_mean": 0.0062088213744573295, "clip_ratio/low_mean": 0.01587432128144428, "clip_ratio/low_min": 0.0025354807585245, "clip_ratio/region_mean": 0.022083142772316933, "epoch": 0.13001300830081258, "grad_norm": 87.21321868896484, "learning_rate": 2e-07, "loss": 0.079, "step": 1393 }, { "clip_ratio/high_max": 0.015203474977170117, "clip_ratio/high_mean": 0.0068955081078456715, "clip_ratio/low_mean": 0.016137466416694224, "clip_ratio/low_min": 0.0020803025981877, "clip_ratio/region_mean": 0.02303297503385693, "epoch": 0.13010634140081317, "grad_norm": 15.869612693786621, "learning_rate": 2e-07, "loss": 0.0779, "step": 1394 }, { "clip_ratio/high_max": 0.01649062510114163, "clip_ratio/high_mean": 0.0072498052031733096, "clip_ratio/low_mean": 0.015009917580755427, "clip_ratio/low_min": 0.0022273879148997366, "clip_ratio/region_mean": 0.022259722754824907, "epoch": 0.13019967450081374, "grad_norm": 24.813819885253906, "learning_rate": 2e-07, "loss": 0.0473, "step": 1395 }, { "clip_ratio/high_max": 0.017269184871111065, "clip_ratio/high_mean": 0.007246662804391235, "clip_ratio/low_mean": 0.015746300108730793, "clip_ratio/low_min": 0.001290422645979561, "clip_ratio/region_mean": 0.022992962913122028, "epoch": 0.13029300760081433, "grad_norm": 9.588167190551758, "learning_rate": 2e-07, "loss": 0.0787, "step": 1396 }, { "clip_ratio/high_max": 0.012633189035113901, "clip_ratio/high_mean": 0.006693540533888154, "clip_ratio/low_mean": 0.016628692508675158, "clip_ratio/low_min": 0.0026046088751172647, "clip_ratio/region_mean": 0.023322232766076922, "epoch": 0.13038634070081492, "grad_norm": 128.11599731445312, "learning_rate": 2e-07, "loss": 0.0712, "step": 1397 }, { "clip_ratio/high_max": 0.01474394096294418, "clip_ratio/high_mean": 0.006971714828978293, "clip_ratio/low_mean": 0.018531307694502175, "clip_ratio/low_min": 0.0033085665199905634, "clip_ratio/region_mean": 0.025503022479824722, "epoch": 0.1304796738008155, "grad_norm": 297.33587646484375, "learning_rate": 2e-07, "loss": 0.0789, "step": 1398 }, { "clip_ratio/high_max": 0.014904955809470266, "clip_ratio/high_mean": 0.006701694583171047, "clip_ratio/low_mean": 0.01561338413739577, "clip_ratio/low_min": 0.0015448903504875489, "clip_ratio/region_mean": 0.022315078764222562, "epoch": 0.13057300690081608, "grad_norm": 113.62066650390625, "learning_rate": 2e-07, "loss": 0.1132, "step": 1399 }, { "clip_ratio/high_max": 0.015899311227258295, "clip_ratio/high_mean": 0.007611666951561347, "clip_ratio/low_mean": 0.014763011655304581, "clip_ratio/low_min": 0.0024383912968914956, "clip_ratio/region_mean": 0.022374678927008063, "epoch": 0.13066634000081667, "grad_norm": 9.21013069152832, "learning_rate": 2e-07, "loss": 0.0487, "step": 1400 }, { "clip_ratio/high_max": 0.014716992038302124, "clip_ratio/high_mean": 0.006733620713930577, "clip_ratio/low_mean": 0.01628228829940781, "clip_ratio/low_min": 0.002814228064380586, "clip_ratio/region_mean": 0.023015908838715404, "epoch": 0.13075967310081724, "grad_norm": 108.51646423339844, "learning_rate": 2e-07, "loss": 0.117, "step": 1401 }, { "clip_ratio/high_max": 0.013829248084221035, "clip_ratio/high_mean": 0.006705766209051944, "clip_ratio/low_mean": 0.016364801966119558, "clip_ratio/low_min": 0.003270134395279456, "clip_ratio/region_mean": 0.02307056833524257, "epoch": 0.13085300620081783, "grad_norm": 184.2284393310547, "learning_rate": 2e-07, "loss": 0.1249, "step": 1402 }, { "clip_ratio/high_max": 0.018688655458390713, "clip_ratio/high_mean": 0.008420353609835729, "clip_ratio/low_mean": 0.015965425118338317, "clip_ratio/low_min": 0.0014091550474404357, "clip_ratio/region_mean": 0.024385778582654893, "epoch": 0.13094633930081842, "grad_norm": 22.842815399169922, "learning_rate": 2e-07, "loss": 0.0614, "step": 1403 }, { "clip_ratio/high_max": 0.018823303340468556, "clip_ratio/high_mean": 0.007889596949098632, "clip_ratio/low_mean": 0.017035974946338683, "clip_ratio/low_min": 0.0019894522556569427, "clip_ratio/region_mean": 0.024925571866333485, "epoch": 0.13103967240081899, "grad_norm": 3422.71630859375, "learning_rate": 2e-07, "loss": 0.1899, "step": 1404 }, { "clip_ratio/high_max": 0.015640896279364824, "clip_ratio/high_mean": 0.00706898893986363, "clip_ratio/low_mean": 0.016036787361372262, "clip_ratio/low_min": 0.001825592371460516, "clip_ratio/region_mean": 0.02310577651951462, "epoch": 0.13113300550081958, "grad_norm": 12.950906753540039, "learning_rate": 2e-07, "loss": 0.0788, "step": 1405 }, { "clip_ratio/high_max": 0.016808160347864032, "clip_ratio/high_mean": 0.00787089581717737, "clip_ratio/low_mean": 0.017841617111116648, "clip_ratio/low_min": 0.0020131173077970743, "clip_ratio/region_mean": 0.025712513248436153, "epoch": 0.13122633860082017, "grad_norm": 22.400840759277344, "learning_rate": 2e-07, "loss": 0.1057, "step": 1406 }, { "clip_ratio/high_max": 0.016137479367898777, "clip_ratio/high_mean": 0.0077819539728807285, "clip_ratio/low_mean": 0.018143379827961326, "clip_ratio/low_min": 0.00130741237080656, "clip_ratio/region_mean": 0.02592533396091312, "epoch": 0.13131967170082076, "grad_norm": 30.81023406982422, "learning_rate": 2e-07, "loss": 0.0573, "step": 1407 }, { "clip_ratio/high_max": 0.020197402860503644, "clip_ratio/high_mean": 0.008821286755846813, "clip_ratio/low_mean": 0.019611548603279516, "clip_ratio/low_min": 0.002650469046784565, "clip_ratio/region_mean": 0.028432834602426738, "epoch": 0.13141300480082133, "grad_norm": 69.15196228027344, "learning_rate": 2e-07, "loss": 0.0855, "step": 1408 }, { "clip_ratio/high_max": 0.0034917405719170347, "clip_ratio/high_mean": 0.0010317024043615675, "clip_ratio/low_mean": 0.002106563093548175, "clip_ratio/low_min": 0.0001595597536834248, "clip_ratio/region_mean": 0.003138265456072986, "completions/clipped_ratio": 0.1274239676339286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 920.465576171875, "completions/mean_terminated_length": 456.736083984375, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.13150633790082192, "grad_norm": 1.2340682744979858, "learning_rate": 2e-07, "loss": 0.1062, "num_tokens": 1058172428.0, "reward": 0.1373814195394516, "reward_std": 0.16018612682819366, "rewards/simpleverify_reward/mean": 0.1373814195394516, "rewards/simpleverify_reward/std": 0.34425103664398193, "step": 1409 }, { "clip_ratio/high_max": 0.002850983593816636, "clip_ratio/high_mean": 0.0009008319830172695, "clip_ratio/low_mean": 0.0019528836965037044, "clip_ratio/low_min": 5.574408714892343e-05, "clip_ratio/region_mean": 0.0028537157631944865, "epoch": 0.1315996710008225, "grad_norm": 0.8146823644638062, "learning_rate": 2e-07, "loss": 0.1214, "step": 1410 }, { "clip_ratio/high_max": 0.003719995584106073, "clip_ratio/high_mean": 0.0011078885218012147, "clip_ratio/low_mean": 0.0020745110923598986, "clip_ratio/low_min": 0.0001590524702805851, "clip_ratio/region_mean": 0.0031823996250750497, "epoch": 0.13169300410082307, "grad_norm": 2.607980251312256, "learning_rate": 2e-07, "loss": 0.0782, "step": 1411 }, { "clip_ratio/high_max": 0.0028500656226242427, "clip_ratio/high_mean": 0.0009317502899648389, "clip_ratio/low_mean": 0.001993024183320813, "clip_ratio/low_min": 0.0002563478356023552, "clip_ratio/region_mean": 0.0029247745405882597, "epoch": 0.13178633720082367, "grad_norm": 29.320541381835938, "learning_rate": 2e-07, "loss": 0.113, "step": 1412 }, { "clip_ratio/high_max": 0.003154070087475702, "clip_ratio/high_mean": 0.000961777392149088, "clip_ratio/low_mean": 0.002076791035506176, "clip_ratio/low_min": 0.00014874911857987172, "clip_ratio/region_mean": 0.003038568473129999, "epoch": 0.13187967030082426, "grad_norm": 0.5086946487426758, "learning_rate": 2e-07, "loss": 0.0821, "step": 1413 }, { "clip_ratio/high_max": 0.002783269137580646, "clip_ratio/high_mean": 0.0008951761756179621, "clip_ratio/low_mean": 0.0019675041585287545, "clip_ratio/low_min": 0.00028286849965297733, "clip_ratio/region_mean": 0.0028626803541556, "epoch": 0.13197300340082482, "grad_norm": 3164.13232421875, "learning_rate": 2e-07, "loss": 0.2456, "step": 1414 }, { "clip_ratio/high_max": 0.004083576728589833, "clip_ratio/high_mean": 0.001235523139257566, "clip_ratio/low_mean": 0.002217590379586909, "clip_ratio/low_min": 0.000334015792759601, "clip_ratio/region_mean": 0.0034531135170254856, "epoch": 0.13206633650082542, "grad_norm": 5.14722204208374, "learning_rate": 2e-07, "loss": 0.0962, "step": 1415 }, { "clip_ratio/high_max": 0.0027193506684852764, "clip_ratio/high_mean": 0.0009277365134039428, "clip_ratio/low_mean": 0.0017397388292010874, "clip_ratio/low_min": 0.00011705312954291003, "clip_ratio/region_mean": 0.0026674753389670514, "epoch": 0.132159669600826, "grad_norm": 1.3632886409759521, "learning_rate": 2e-07, "loss": 0.1083, "step": 1416 }, { "clip_ratio/high_max": 0.0020166500471532345, "clip_ratio/high_mean": 0.0007231217823573388, "clip_ratio/low_mean": 0.002787073281069752, "clip_ratio/low_min": 0.0003263078506279271, "clip_ratio/region_mean": 0.003510195092530921, "epoch": 0.13225300270082657, "grad_norm": 0.9144637584686279, "learning_rate": 2e-07, "loss": 0.0776, "step": 1417 }, { "clip_ratio/high_max": 0.002979688288178295, "clip_ratio/high_mean": 0.0008559236321161734, "clip_ratio/low_mean": 0.002458118793583708, "clip_ratio/low_min": 0.00015756663469801424, "clip_ratio/region_mean": 0.0033140424638986588, "epoch": 0.13234633580082716, "grad_norm": 1.4139240980148315, "learning_rate": 2e-07, "loss": 0.101, "step": 1418 }, { "clip_ratio/high_max": 0.0034353812670815387, "clip_ratio/high_mean": 0.0008467616032703518, "clip_ratio/low_mean": 0.0025635969650465995, "clip_ratio/low_min": 0.00016716705567887402, "clip_ratio/region_mean": 0.003410358702240046, "epoch": 0.13243966890082776, "grad_norm": 0.9239667654037476, "learning_rate": 2e-07, "loss": 0.0701, "step": 1419 }, { "clip_ratio/high_max": 0.004143041245697532, "clip_ratio/high_mean": 0.0011480730445327936, "clip_ratio/low_mean": 0.0025625305133871734, "clip_ratio/low_min": 0.00026376529967819806, "clip_ratio/region_mean": 0.0037106035160832107, "epoch": 0.13253300200082832, "grad_norm": 2.1368558406829834, "learning_rate": 2e-07, "loss": 0.075, "step": 1420 }, { "clip_ratio/high_max": 0.003548359491105657, "clip_ratio/high_mean": 0.0010898371883740765, "clip_ratio/low_mean": 0.003178080325596966, "clip_ratio/low_min": 0.0004715784998552408, "clip_ratio/region_mean": 0.004267917407560162, "epoch": 0.1326263351008289, "grad_norm": 552.5911865234375, "learning_rate": 2e-07, "loss": 0.1456, "step": 1421 }, { "clip_ratio/high_max": 0.004039390143589117, "clip_ratio/high_mean": 0.001063418233570701, "clip_ratio/low_mean": 0.002983271326229442, "clip_ratio/low_min": 0.0004501670628087595, "clip_ratio/region_mean": 0.004046689529786818, "epoch": 0.1327196682008295, "grad_norm": 2.0642826557159424, "learning_rate": 2e-07, "loss": 0.0789, "step": 1422 }, { "clip_ratio/high_max": 0.0031615907719242387, "clip_ratio/high_mean": 0.000991053699181066, "clip_ratio/low_mean": 0.0032869467468117364, "clip_ratio/low_min": 0.00045175705326983007, "clip_ratio/region_mean": 0.004278000502381474, "epoch": 0.13281300130083007, "grad_norm": 2.024412155151367, "learning_rate": 2e-07, "loss": 0.0689, "step": 1423 }, { "clip_ratio/high_max": 0.003114478058705572, "clip_ratio/high_mean": 0.0009738494773046114, "clip_ratio/low_mean": 0.00278983851603698, "clip_ratio/low_min": 0.00022726818224327872, "clip_ratio/region_mean": 0.003763688000617549, "epoch": 0.13290633440083066, "grad_norm": 183.63143920898438, "learning_rate": 2e-07, "loss": 0.0984, "step": 1424 }, { "clip_ratio/high_max": 0.00428938015102176, "clip_ratio/high_mean": 0.0013461861744872294, "clip_ratio/low_mean": 0.0026578068573144265, "clip_ratio/low_min": 0.0001812715086089156, "clip_ratio/region_mean": 0.004003992929938249, "epoch": 0.13299966750083125, "grad_norm": 15.79616641998291, "learning_rate": 2e-07, "loss": 0.0864, "step": 1425 }, { "clip_ratio/high_max": 0.003437613297137432, "clip_ratio/high_mean": 0.00103465749816678, "clip_ratio/low_mean": 0.0031517129682470113, "clip_ratio/low_min": 0.0003390138936083531, "clip_ratio/region_mean": 0.004186370468232781, "epoch": 0.13309300060083182, "grad_norm": 3.787747383117676, "learning_rate": 2e-07, "loss": 0.0947, "step": 1426 }, { "clip_ratio/high_max": 0.002753906181169441, "clip_ratio/high_mean": 0.0008410741666011745, "clip_ratio/low_mean": 0.003531698872393463, "clip_ratio/low_min": 0.00036665747302322416, "clip_ratio/region_mean": 0.0043727730080718175, "epoch": 0.1331863337008324, "grad_norm": 3.0714917182922363, "learning_rate": 2e-07, "loss": 0.0946, "step": 1427 }, { "clip_ratio/high_max": 0.004165698293945752, "clip_ratio/high_mean": 0.001297348773732665, "clip_ratio/low_mean": 0.003802986473601777, "clip_ratio/low_min": 0.0004603304605552694, "clip_ratio/region_mean": 0.005100335270981304, "epoch": 0.133279666800833, "grad_norm": 22785.83984375, "learning_rate": 2e-07, "loss": 0.6319, "step": 1428 }, { "clip_ratio/high_max": 0.0024231291390606202, "clip_ratio/high_mean": 0.0007571321166324196, "clip_ratio/low_mean": 0.004258909320924431, "clip_ratio/low_min": 0.00042251830473105656, "clip_ratio/region_mean": 0.005016041497583501, "epoch": 0.1333729999008336, "grad_norm": 145.3633270263672, "learning_rate": 2e-07, "loss": 0.1133, "step": 1429 }, { "clip_ratio/high_max": 0.003018751896888716, "clip_ratio/high_mean": 0.0009828585061768536, "clip_ratio/low_mean": 0.003883214885718189, "clip_ratio/low_min": 0.000307269405311672, "clip_ratio/region_mean": 0.0048660733591532335, "epoch": 0.13346633300083416, "grad_norm": 2.4388821125030518, "learning_rate": 2e-07, "loss": 0.0961, "step": 1430 }, { "clip_ratio/high_max": 0.00415704473562073, "clip_ratio/high_mean": 0.0011983077292825328, "clip_ratio/low_mean": 0.005105378266307525, "clip_ratio/low_min": 0.0004039287468913244, "clip_ratio/region_mean": 0.006303685906459577, "epoch": 0.13355966610083475, "grad_norm": 19.223356246948242, "learning_rate": 2e-07, "loss": 0.0865, "step": 1431 }, { "clip_ratio/high_max": 0.004481497213419061, "clip_ratio/high_mean": 0.001327207506619743, "clip_ratio/low_mean": 0.004572874662699178, "clip_ratio/low_min": 0.0006425259962270502, "clip_ratio/region_mean": 0.005900082207517698, "epoch": 0.13365299920083534, "grad_norm": 17.14619255065918, "learning_rate": 2e-07, "loss": 0.0912, "step": 1432 }, { "clip_ratio/high_max": 0.004286933719413355, "clip_ratio/high_mean": 0.00131725390565407, "clip_ratio/low_mean": 0.004819051879167091, "clip_ratio/low_min": 0.0006973019849283446, "clip_ratio/region_mean": 0.006136305775726214, "epoch": 0.1337463323008359, "grad_norm": 62.88805389404297, "learning_rate": 2e-07, "loss": 0.1339, "step": 1433 }, { "clip_ratio/high_max": 0.003971074285800569, "clip_ratio/high_mean": 0.0013571663384936983, "clip_ratio/low_mean": 0.00425802241807105, "clip_ratio/low_min": 0.0008193005996872671, "clip_ratio/region_mean": 0.005615188696538098, "epoch": 0.1338396654008365, "grad_norm": 243487.8125, "learning_rate": 2e-07, "loss": 7.0422, "step": 1434 }, { "clip_ratio/high_max": 0.005954889660642948, "clip_ratio/high_mean": 0.0016240957629634067, "clip_ratio/low_mean": 0.005316458584275097, "clip_ratio/low_min": 0.00036492736217041966, "clip_ratio/region_mean": 0.006940554405446164, "epoch": 0.1339329985008371, "grad_norm": 231.15589904785156, "learning_rate": 2e-07, "loss": 0.0913, "step": 1435 }, { "clip_ratio/high_max": 0.004649173417419661, "clip_ratio/high_mean": 0.0013758723052887945, "clip_ratio/low_mean": 0.0038631097122561187, "clip_ratio/low_min": 0.00037210579102975316, "clip_ratio/region_mean": 0.005238982063019648, "epoch": 0.13402633160083766, "grad_norm": 1515.633056640625, "learning_rate": 2e-07, "loss": 0.1328, "step": 1436 }, { "clip_ratio/high_max": 0.005194587065489031, "clip_ratio/high_mean": 0.0016361588204745203, "clip_ratio/low_mean": 0.005354176755645312, "clip_ratio/low_min": 0.00024152064725058153, "clip_ratio/region_mean": 0.006990335648879409, "epoch": 0.13411966470083825, "grad_norm": 350.9219055175781, "learning_rate": 2e-07, "loss": 0.3242, "step": 1437 }, { "clip_ratio/high_max": 0.004842477948841406, "clip_ratio/high_mean": 0.0014230941214918857, "clip_ratio/low_mean": 0.005127488628204446, "clip_ratio/low_min": 0.00017847353501565522, "clip_ratio/region_mean": 0.00655058266420383, "epoch": 0.13421299780083884, "grad_norm": 38.4141960144043, "learning_rate": 2e-07, "loss": 0.1048, "step": 1438 }, { "clip_ratio/high_max": 0.010436508571729064, "clip_ratio/high_mean": 0.0026675398839870468, "clip_ratio/low_mean": 0.004330304553150199, "clip_ratio/low_min": 0.0002762263065960724, "clip_ratio/region_mean": 0.006997844422585331, "epoch": 0.1343063309008394, "grad_norm": 8097.30224609375, "learning_rate": 2e-07, "loss": 0.3944, "step": 1439 }, { "clip_ratio/high_max": 0.005507613372174092, "clip_ratio/high_mean": 0.0017703950543364044, "clip_ratio/low_mean": 0.004967420121829491, "clip_ratio/low_min": 0.00020942808077961672, "clip_ratio/region_mean": 0.00673781504156068, "epoch": 0.13439966400084, "grad_norm": 15.0077543258667, "learning_rate": 2e-07, "loss": 0.0849, "step": 1440 }, { "clip_ratio/high_max": 0.008388139598537236, "clip_ratio/high_mean": 0.0024348700244445354, "clip_ratio/low_mean": 0.004160194388532545, "clip_ratio/low_min": 0.0002790682738122996, "clip_ratio/region_mean": 0.006595064289285801, "epoch": 0.1344929971008406, "grad_norm": 16477.44921875, "learning_rate": 2e-07, "loss": 0.8173, "step": 1441 }, { "clip_ratio/high_max": 0.006202811571711209, "clip_ratio/high_mean": 0.0018789043388096616, "clip_ratio/low_mean": 0.004534453313681297, "clip_ratio/low_min": 0.00034072730068146484, "clip_ratio/region_mean": 0.0064133577106986195, "epoch": 0.13458633020084115, "grad_norm": 810.54443359375, "learning_rate": 2e-07, "loss": 0.1359, "step": 1442 }, { "clip_ratio/high_max": 0.006806824603700079, "clip_ratio/high_mean": 0.0017695105525490362, "clip_ratio/low_mean": 0.005506634224730078, "clip_ratio/low_min": 0.00065718778205337, "clip_ratio/region_mean": 0.007276144708157517, "epoch": 0.13467966330084175, "grad_norm": 1556.5670166015625, "learning_rate": 2e-07, "loss": 0.176, "step": 1443 }, { "clip_ratio/high_max": 0.007051138796668965, "clip_ratio/high_mean": 0.0022924738659639843, "clip_ratio/low_mean": 0.004710242588771507, "clip_ratio/low_min": 0.0005501996647581109, "clip_ratio/region_mean": 0.007002716578426771, "epoch": 0.13477299640084234, "grad_norm": 647.2376708984375, "learning_rate": 2e-07, "loss": 0.139, "step": 1444 }, { "clip_ratio/high_max": 0.005365365213947371, "clip_ratio/high_mean": 0.0014938992444513133, "clip_ratio/low_mean": 0.0052102112385910004, "clip_ratio/low_min": 0.000979726577497786, "clip_ratio/region_mean": 0.006704110302962363, "epoch": 0.1348663295008429, "grad_norm": 102.38212585449219, "learning_rate": 2e-07, "loss": 0.1308, "step": 1445 }, { "clip_ratio/high_max": 0.008468388914479874, "clip_ratio/high_mean": 0.002404698185273446, "clip_ratio/low_mean": 0.004492271735216491, "clip_ratio/low_min": 0.0006960736682231072, "clip_ratio/region_mean": 0.006896969804074615, "epoch": 0.1349596626008435, "grad_norm": 420.0313720703125, "learning_rate": 2e-07, "loss": 0.1058, "step": 1446 }, { "clip_ratio/high_max": 0.004974880619556643, "clip_ratio/high_mean": 0.0015506986710533965, "clip_ratio/low_mean": 0.005655403023411054, "clip_ratio/low_min": 0.0003711468671099283, "clip_ratio/region_mean": 0.0072061015671351925, "epoch": 0.1350529957008441, "grad_norm": 7671.556640625, "learning_rate": 2e-07, "loss": 0.3504, "step": 1447 }, { "clip_ratio/high_max": 0.007234136359329568, "clip_ratio/high_mean": 0.001955603372607584, "clip_ratio/low_mean": 0.005161738707101904, "clip_ratio/low_min": 0.0008712913258932531, "clip_ratio/region_mean": 0.00711734194192104, "epoch": 0.13514632880084468, "grad_norm": 117.27584075927734, "learning_rate": 2e-07, "loss": 0.1024, "step": 1448 }, { "clip_ratio/high_max": 0.006977778917644173, "clip_ratio/high_mean": 0.0023631368931091856, "clip_ratio/low_mean": 0.004765874553413596, "clip_ratio/low_min": 0.00035495298288878985, "clip_ratio/region_mean": 0.0071290116175077856, "epoch": 0.13523966190084524, "grad_norm": 1065639168.0, "learning_rate": 2e-07, "loss": 17387.8965, "step": 1449 }, { "clip_ratio/high_max": 0.005588789965258911, "clip_ratio/high_mean": 0.0016506532701896504, "clip_ratio/low_mean": 0.005156362938578241, "clip_ratio/low_min": 0.00043442121022962965, "clip_ratio/region_mean": 0.006807016354287043, "epoch": 0.13533299500084583, "grad_norm": 2551.2138671875, "learning_rate": 2e-07, "loss": 0.1829, "step": 1450 }, { "clip_ratio/high_max": 0.0067286554258316755, "clip_ratio/high_mean": 0.0019457521266303957, "clip_ratio/low_mean": 0.006285369134275243, "clip_ratio/low_min": 0.0004948289897583891, "clip_ratio/region_mean": 0.008231121086282656, "epoch": 0.13542632810084643, "grad_norm": 817.2671508789062, "learning_rate": 2e-07, "loss": 0.1417, "step": 1451 }, { "clip_ratio/high_max": 0.006912979333719704, "clip_ratio/high_mean": 0.0022599385847570375, "clip_ratio/low_mean": 0.004771615727804601, "clip_ratio/low_min": 0.00039299659692915156, "clip_ratio/region_mean": 0.007031554196146317, "epoch": 0.135519661200847, "grad_norm": 144.7241668701172, "learning_rate": 2e-07, "loss": 0.1248, "step": 1452 }, { "clip_ratio/high_max": 0.005359833252441604, "clip_ratio/high_mean": 0.0017729559494910063, "clip_ratio/low_mean": 0.007604939877637662, "clip_ratio/low_min": 0.0008173747446562629, "clip_ratio/region_mean": 0.009377895767102018, "epoch": 0.13561299430084758, "grad_norm": 325.6852111816406, "learning_rate": 2e-07, "loss": 0.1816, "step": 1453 }, { "clip_ratio/high_max": 0.004857336993154604, "clip_ratio/high_mean": 0.0020133468169660773, "clip_ratio/low_mean": 0.005867306841537356, "clip_ratio/low_min": 0.0009200707841046096, "clip_ratio/region_mean": 0.007880653807660565, "epoch": 0.13570632740084818, "grad_norm": 189.5345916748047, "learning_rate": 2e-07, "loss": 0.1562, "step": 1454 }, { "clip_ratio/high_max": 0.006058206534362398, "clip_ratio/high_mean": 0.002074853611702565, "clip_ratio/low_mean": 0.005655181797919795, "clip_ratio/low_min": 0.00047107947466429323, "clip_ratio/region_mean": 0.007730035358690657, "epoch": 0.13579966050084874, "grad_norm": 2778336.0, "learning_rate": 2e-07, "loss": 49.2133, "step": 1455 }, { "clip_ratio/high_max": 0.005761111759056803, "clip_ratio/high_mean": 0.0018612701951497002, "clip_ratio/low_mean": 0.004519296417129226, "clip_ratio/low_min": 0.00038266646515694447, "clip_ratio/region_mean": 0.006380566614097916, "epoch": 0.13589299360084933, "grad_norm": 5004.412109375, "learning_rate": 2e-07, "loss": 0.2914, "step": 1456 }, { "clip_ratio/high_max": 0.006787259022530634, "clip_ratio/high_mean": 0.0019501526694511995, "clip_ratio/low_mean": 0.0064736987187643535, "clip_ratio/low_min": 0.0007250098715303466, "clip_ratio/region_mean": 0.008423851046245545, "epoch": 0.13598632670084992, "grad_norm": 334.9818420410156, "learning_rate": 2e-07, "loss": 0.1282, "step": 1457 }, { "clip_ratio/high_max": 0.0068399918418435846, "clip_ratio/high_mean": 0.0016516001628588128, "clip_ratio/low_mean": 0.004827113407372963, "clip_ratio/low_min": 0.0007999262597877532, "clip_ratio/region_mean": 0.00647871351975482, "epoch": 0.1360796598008505, "grad_norm": 1324.082275390625, "learning_rate": 2e-07, "loss": 0.1698, "step": 1458 }, { "clip_ratio/high_max": 0.006564840652572457, "clip_ratio/high_mean": 0.0022351597090164432, "clip_ratio/low_mean": 0.0057496990921208635, "clip_ratio/low_min": 0.0005617207316390704, "clip_ratio/region_mean": 0.007984858966665342, "epoch": 0.13617299290085108, "grad_norm": 1144.6529541015625, "learning_rate": 2e-07, "loss": 0.1495, "step": 1459 }, { "clip_ratio/high_max": 0.007493828758015297, "clip_ratio/high_mean": 0.002173895605665166, "clip_ratio/low_mean": 0.005498423081007786, "clip_ratio/low_min": 0.00048703958600526676, "clip_ratio/region_mean": 0.007672318868571892, "epoch": 0.13626632600085167, "grad_norm": 135.51280212402344, "learning_rate": 2e-07, "loss": 0.1302, "step": 1460 }, { "clip_ratio/high_max": 0.008647413400467485, "clip_ratio/high_mean": 0.0024197340826503932, "clip_ratio/low_mean": 0.00645587412873283, "clip_ratio/low_min": 0.0010710514234233415, "clip_ratio/region_mean": 0.008875608153175563, "epoch": 0.13635965910085224, "grad_norm": 2367.09375, "learning_rate": 2e-07, "loss": 0.229, "step": 1461 }, { "clip_ratio/high_max": 0.009749329241458327, "clip_ratio/high_mean": 0.003116860279988032, "clip_ratio/low_mean": 0.005873062626051251, "clip_ratio/low_min": 0.000810243480373174, "clip_ratio/region_mean": 0.008989922949695028, "epoch": 0.13645299220085283, "grad_norm": 10801.2080078125, "learning_rate": 2e-07, "loss": 0.5577, "step": 1462 }, { "clip_ratio/high_max": 0.007331577799050137, "clip_ratio/high_mean": 0.0023855306881159777, "clip_ratio/low_mean": 0.0055911670497152954, "clip_ratio/low_min": 0.0004712052013928769, "clip_ratio/region_mean": 0.007976697743288241, "epoch": 0.13654632530085342, "grad_norm": 100856.0625, "learning_rate": 2e-07, "loss": 1.1465, "step": 1463 }, { "clip_ratio/high_max": 0.009189326759951655, "clip_ratio/high_mean": 0.0026437763845024165, "clip_ratio/low_mean": 0.0058936806744895875, "clip_ratio/low_min": 0.0007874863331380766, "clip_ratio/region_mean": 0.008537457091733813, "epoch": 0.13663965840085399, "grad_norm": 6278.16064453125, "learning_rate": 2e-07, "loss": 0.261, "step": 1464 }, { "clip_ratio/high_max": 0.010295358617440797, "clip_ratio/high_mean": 0.002946512668131618, "clip_ratio/low_mean": 0.005151014556759037, "clip_ratio/low_min": 0.0006401431728590978, "clip_ratio/region_mean": 0.008097527403151616, "epoch": 0.13673299150085458, "grad_norm": 10188.6435546875, "learning_rate": 2e-07, "loss": 0.3585, "step": 1465 }, { "clip_ratio/high_max": 0.008779369585681707, "clip_ratio/high_mean": 0.002560641551099252, "clip_ratio/low_mean": 0.005750521719164681, "clip_ratio/low_min": 0.00040780836570775136, "clip_ratio/region_mean": 0.008311163299367763, "epoch": 0.13682632460085517, "grad_norm": 226815232.0, "learning_rate": 2e-07, "loss": 13867.4434, "step": 1466 }, { "clip_ratio/high_max": 0.007305313571123406, "clip_ratio/high_mean": 0.0022002926598361228, "clip_ratio/low_mean": 0.004813257080968469, "clip_ratio/low_min": 0.0002513038016331848, "clip_ratio/region_mean": 0.007013549853581935, "epoch": 0.13691965770085573, "grad_norm": 3517.51611328125, "learning_rate": 2e-07, "loss": 0.1704, "step": 1467 }, { "clip_ratio/high_max": 0.007766131166135892, "clip_ratio/high_mean": 0.002154123372747563, "clip_ratio/low_mean": 0.006541856360854581, "clip_ratio/low_min": 0.0003070117181778187, "clip_ratio/region_mean": 0.008695979864569381, "epoch": 0.13701299080085633, "grad_norm": 5785.6201171875, "learning_rate": 2e-07, "loss": 0.3348, "step": 1468 }, { "clip_ratio/high_max": 0.005988854689348955, "clip_ratio/high_mean": 0.0018364719653618522, "clip_ratio/low_mean": 0.005896807721001096, "clip_ratio/low_min": 0.0004301241187931737, "clip_ratio/region_mean": 0.007733279722742736, "epoch": 0.13710632390085692, "grad_norm": 2611.37353515625, "learning_rate": 2e-07, "loss": 0.1774, "step": 1469 }, { "clip_ratio/high_max": 0.007534066106018145, "clip_ratio/high_mean": 0.002155297846911708, "clip_ratio/low_mean": 0.00626742652093526, "clip_ratio/low_min": 0.0011977599065176037, "clip_ratio/region_mean": 0.008422724233241752, "epoch": 0.1371996570008575, "grad_norm": 166345.890625, "learning_rate": 2e-07, "loss": 5.1383, "step": 1470 }, { "clip_ratio/high_max": 0.008879992747097276, "clip_ratio/high_mean": 0.002703597681829706, "clip_ratio/low_mean": 0.005348357393813785, "clip_ratio/low_min": 0.0007206140362541191, "clip_ratio/region_mean": 0.008051954777329229, "epoch": 0.13729299010085808, "grad_norm": 443.6673583984375, "learning_rate": 2e-07, "loss": 0.3003, "step": 1471 }, { "clip_ratio/high_max": 0.008881206857040524, "clip_ratio/high_mean": 0.0024145210772985592, "clip_ratio/low_mean": 0.007276998338056728, "clip_ratio/low_min": 0.001335394976194948, "clip_ratio/region_mean": 0.009691519488114864, "epoch": 0.13738632320085867, "grad_norm": 73148.5234375, "learning_rate": 2e-07, "loss": 7.8584, "step": 1472 }, { "clip_ratio/high_max": 0.008089089635177515, "clip_ratio/high_mean": 0.0021771364481537603, "clip_ratio/low_mean": 0.005132714475621469, "clip_ratio/low_min": 0.0003153640282107517, "clip_ratio/region_mean": 0.007309851018362679, "epoch": 0.13747965630085926, "grad_norm": 325.13897705078125, "learning_rate": 2e-07, "loss": 0.1234, "step": 1473 }, { "clip_ratio/high_max": 0.009867451473837718, "clip_ratio/high_mean": 0.002715134836762445, "clip_ratio/low_mean": 0.0059342460735933855, "clip_ratio/low_min": 0.00044754139162250794, "clip_ratio/region_mean": 0.008649381081340834, "epoch": 0.13757298940085982, "grad_norm": 10917.28125, "learning_rate": 2e-07, "loss": 0.3105, "step": 1474 }, { "clip_ratio/high_max": 0.009670020459452644, "clip_ratio/high_mean": 0.0027035189468733734, "clip_ratio/low_mean": 0.0054028749000281096, "clip_ratio/low_min": 0.0005357317677407991, "clip_ratio/region_mean": 0.008106393957859837, "epoch": 0.13766632250086042, "grad_norm": 7847.5185546875, "learning_rate": 2e-07, "loss": 0.4024, "step": 1475 }, { "clip_ratio/high_max": 0.007240221817482961, "clip_ratio/high_mean": 0.0021153762172616553, "clip_ratio/low_mean": 0.005755964710260741, "clip_ratio/low_min": 0.0006380534032359719, "clip_ratio/region_mean": 0.007871341076679528, "epoch": 0.137759655600861, "grad_norm": 735.4156494140625, "learning_rate": 2e-07, "loss": 0.12, "step": 1476 }, { "clip_ratio/high_max": 0.008236524234234821, "clip_ratio/high_mean": 0.0026749814005597727, "clip_ratio/low_mean": 0.005593385372776538, "clip_ratio/low_min": 0.0006847293261671439, "clip_ratio/region_mean": 0.008268366858828813, "epoch": 0.13785298870086157, "grad_norm": 21737697280.0, "learning_rate": 2e-07, "loss": 250428.8125, "step": 1477 }, { "clip_ratio/high_max": 0.006871360579680186, "clip_ratio/high_mean": 0.002191756750107743, "clip_ratio/low_mean": 0.005869777814950794, "clip_ratio/low_min": 0.0017172887346532661, "clip_ratio/region_mean": 0.008061534652370028, "epoch": 0.13794632180086216, "grad_norm": 2831.79736328125, "learning_rate": 2e-07, "loss": 0.3071, "step": 1478 }, { "clip_ratio/high_max": 0.007536492339568213, "clip_ratio/high_mean": 0.002407484258583281, "clip_ratio/low_mean": 0.006683551451715175, "clip_ratio/low_min": 0.0008019790502657997, "clip_ratio/region_mean": 0.009091035753954202, "epoch": 0.13803965490086276, "grad_norm": 13872.025390625, "learning_rate": 2e-07, "loss": 0.7143, "step": 1479 }, { "clip_ratio/high_max": 0.006915603065863252, "clip_ratio/high_mean": 0.0020499253951129504, "clip_ratio/low_mean": 0.0063171433575917035, "clip_ratio/low_min": 0.0005435060265881475, "clip_ratio/region_mean": 0.008367068920051679, "epoch": 0.13813298800086332, "grad_norm": 738.7655029296875, "learning_rate": 2e-07, "loss": 0.1486, "step": 1480 }, { "clip_ratio/high_max": 0.009973643682315014, "clip_ratio/high_mean": 0.002607977126899641, "clip_ratio/low_mean": 0.005412334241555072, "clip_ratio/low_min": 0.00035683299029187765, "clip_ratio/region_mean": 0.008020311666768976, "epoch": 0.1382263211008639, "grad_norm": 46274.46875, "learning_rate": 2e-07, "loss": 2.0843, "step": 1481 }, { "clip_ratio/high_max": 0.007136008018278517, "clip_ratio/high_mean": 0.00234859628835693, "clip_ratio/low_mean": 0.006437916337745264, "clip_ratio/low_min": 0.0011354547932569403, "clip_ratio/region_mean": 0.008786512407823466, "epoch": 0.1383196542008645, "grad_norm": 2590348.75, "learning_rate": 2e-07, "loss": 1902.9944, "step": 1482 }, { "clip_ratio/high_max": 0.010188057844061404, "clip_ratio/high_mean": 0.0029102276712364983, "clip_ratio/low_mean": 0.005978313995001372, "clip_ratio/low_min": 0.0005611587039311416, "clip_ratio/region_mean": 0.008888541749911383, "epoch": 0.13841298730086507, "grad_norm": 395.4180908203125, "learning_rate": 2e-07, "loss": 0.1518, "step": 1483 }, { "clip_ratio/high_max": 0.006695381147437729, "clip_ratio/high_mean": 0.002252870941447327, "clip_ratio/low_mean": 0.006109536057920195, "clip_ratio/low_min": 0.0010449017336213728, "clip_ratio/region_mean": 0.008362406893866137, "epoch": 0.13850632040086566, "grad_norm": 2135421.75, "learning_rate": 2e-07, "loss": 21.9956, "step": 1484 }, { "clip_ratio/high_max": 0.0073409851611359045, "clip_ratio/high_mean": 0.00249104151953361, "clip_ratio/low_mean": 0.005187874339753762, "clip_ratio/low_min": 0.0010463005673955195, "clip_ratio/region_mean": 0.007678915906581096, "epoch": 0.13859965350086625, "grad_norm": 113.12728881835938, "learning_rate": 2e-07, "loss": 0.0991, "step": 1485 }, { "clip_ratio/high_max": 0.007104978463758016, "clip_ratio/high_mean": 0.0023320781510847155, "clip_ratio/low_mean": 0.005304814112605527, "clip_ratio/low_min": 0.000515213243488688, "clip_ratio/region_mean": 0.007636892260052264, "epoch": 0.13869298660086682, "grad_norm": 93.32202911376953, "learning_rate": 2e-07, "loss": 0.1114, "step": 1486 }, { "clip_ratio/high_max": 0.008283168150228448, "clip_ratio/high_mean": 0.0023534151987405494, "clip_ratio/low_mean": 0.005452555400552228, "clip_ratio/low_min": 0.0006040840744390152, "clip_ratio/region_mean": 0.007805970861227252, "epoch": 0.1387863197008674, "grad_norm": 137090944.0, "learning_rate": 2e-07, "loss": 1623.3727, "step": 1487 }, { "clip_ratio/high_max": 0.008993372721306514, "clip_ratio/high_mean": 0.0028979981543670874, "clip_ratio/low_mean": 0.005086147852125578, "clip_ratio/low_min": 0.00031322661743615754, "clip_ratio/region_mean": 0.007984146068338305, "epoch": 0.138879652800868, "grad_norm": 3164.171875, "learning_rate": 2e-07, "loss": 0.2046, "step": 1488 }, { "clip_ratio/high_max": 0.009458193788304925, "clip_ratio/high_mean": 0.0028433490078896284, "clip_ratio/low_mean": 0.006782857395592146, "clip_ratio/low_min": 0.0011850148512166925, "clip_ratio/region_mean": 0.009626206156099215, "epoch": 0.13897298590086857, "grad_norm": 57206.9140625, "learning_rate": 2e-07, "loss": 1.2667, "step": 1489 }, { "clip_ratio/high_max": 0.008538940877770074, "clip_ratio/high_mean": 0.0031768445078341756, "clip_ratio/low_mean": 0.0065518994379090145, "clip_ratio/low_min": 0.0011620893637882546, "clip_ratio/region_mean": 0.009728743956657127, "epoch": 0.13906631900086916, "grad_norm": 915.1483764648438, "learning_rate": 2e-07, "loss": 0.0945, "step": 1490 }, { "clip_ratio/high_max": 0.005669618214596994, "clip_ratio/high_mean": 0.0016231675454037031, "clip_ratio/low_mean": 0.006512166888569482, "clip_ratio/low_min": 0.0009109943930525333, "clip_ratio/region_mean": 0.008135334486723877, "epoch": 0.13915965210086975, "grad_norm": 66290.3359375, "learning_rate": 2e-07, "loss": 1.2677, "step": 1491 }, { "clip_ratio/high_max": 0.009215493410010822, "clip_ratio/high_mean": 0.002293805588124087, "clip_ratio/low_mean": 0.004719295830000192, "clip_ratio/low_min": 0.0005806983026559465, "clip_ratio/region_mean": 0.007013101378106512, "epoch": 0.13925298520087034, "grad_norm": 3732.097900390625, "learning_rate": 2e-07, "loss": 0.1804, "step": 1492 }, { "clip_ratio/high_max": 0.00889978730265284, "clip_ratio/high_mean": 0.00275647769740317, "clip_ratio/low_mean": 0.005451131699373946, "clip_ratio/low_min": 0.001195424087200081, "clip_ratio/region_mean": 0.008207609469536692, "epoch": 0.1393463183008709, "grad_norm": 7707195.0, "learning_rate": 2e-07, "loss": 67.1128, "step": 1493 }, { "clip_ratio/high_max": 0.006558050285093486, "clip_ratio/high_mean": 0.00216191300569335, "clip_ratio/low_mean": 0.005828499037306756, "clip_ratio/low_min": 0.00047607712895114673, "clip_ratio/region_mean": 0.007990411890204996, "epoch": 0.1394396514008715, "grad_norm": 556.0816040039062, "learning_rate": 2e-07, "loss": 0.1524, "step": 1494 }, { "clip_ratio/high_max": 0.0077836069103796035, "clip_ratio/high_mean": 0.0023378397891065106, "clip_ratio/low_mean": 0.006860084584332071, "clip_ratio/low_min": 0.0005471100121212658, "clip_ratio/region_mean": 0.009197924271575175, "epoch": 0.1395329845008721, "grad_norm": 91591.8515625, "learning_rate": 2e-07, "loss": 2.206, "step": 1495 }, { "clip_ratio/high_max": 0.01026911816734355, "clip_ratio/high_mean": 0.003117845708402456, "clip_ratio/low_mean": 0.005843779159476981, "clip_ratio/low_min": 0.0008345485439349432, "clip_ratio/region_mean": 0.008961624902440235, "epoch": 0.13962631760087266, "grad_norm": 5883.73876953125, "learning_rate": 2e-07, "loss": 0.2737, "step": 1496 }, { "clip_ratio/high_max": 0.007312572153750807, "clip_ratio/high_mean": 0.0022731063145329244, "clip_ratio/low_mean": 0.005695893807569519, "clip_ratio/low_min": 0.0007808265363564715, "clip_ratio/region_mean": 0.007969000202137977, "epoch": 0.13971965070087325, "grad_norm": 24601922.0, "learning_rate": 2e-07, "loss": 899.8954, "step": 1497 }, { "clip_ratio/high_max": 0.009946754471457098, "clip_ratio/high_mean": 0.0032042041457316373, "clip_ratio/low_mean": 0.006703616978484206, "clip_ratio/low_min": 0.0004932862884743372, "clip_ratio/region_mean": 0.009907820902299136, "epoch": 0.13981298380087384, "grad_norm": 1514.8369140625, "learning_rate": 2e-07, "loss": 0.1572, "step": 1498 }, { "clip_ratio/high_max": 0.008156065683579072, "clip_ratio/high_mean": 0.0025624113768571988, "clip_ratio/low_mean": 0.0066142017603851855, "clip_ratio/low_min": 0.0007484292618755717, "clip_ratio/region_mean": 0.009176612918963656, "epoch": 0.1399063169008744, "grad_norm": 391.2881164550781, "learning_rate": 2e-07, "loss": 0.1477, "step": 1499 }, { "clip_ratio/high_max": 0.010827489662915468, "clip_ratio/high_mean": 0.0027917309962504078, "clip_ratio/low_mean": 0.007237174781039357, "clip_ratio/low_min": 0.0008522135658495245, "clip_ratio/region_mean": 0.010028905613580719, "epoch": 0.139999650000875, "grad_norm": 121.61569213867188, "learning_rate": 2e-07, "loss": 0.1374, "step": 1500 }, { "clip_ratio/high_max": 0.0081136782100657, "clip_ratio/high_mean": 0.0021284834438120015, "clip_ratio/low_mean": 0.005535592747037299, "clip_ratio/low_min": 0.000548481668374734, "clip_ratio/region_mean": 0.007664076023502275, "epoch": 0.1400929831008756, "grad_norm": 2062.8427734375, "learning_rate": 2e-07, "loss": 0.246, "step": 1501 }, { "clip_ratio/high_max": 0.011051118199247867, "clip_ratio/high_mean": 0.0031485180952586234, "clip_ratio/low_mean": 0.005606901933788322, "clip_ratio/low_min": 0.0005498235914274119, "clip_ratio/region_mean": 0.0087554199853912, "epoch": 0.14018631620087615, "grad_norm": 1902.2830810546875, "learning_rate": 2e-07, "loss": 0.1739, "step": 1502 }, { "clip_ratio/high_max": 0.010130674279935192, "clip_ratio/high_mean": 0.0031240616081049666, "clip_ratio/low_mean": 0.0064166452357312664, "clip_ratio/low_min": 0.00120318611516268, "clip_ratio/region_mean": 0.009540706989355385, "epoch": 0.14027964930087675, "grad_norm": 14640.6650390625, "learning_rate": 2e-07, "loss": 0.5543, "step": 1503 }, { "clip_ratio/high_max": 0.008425082065514289, "clip_ratio/high_mean": 0.002371139169554226, "clip_ratio/low_mean": 0.006036642080289312, "clip_ratio/low_min": 0.0004800515980605269, "clip_ratio/region_mean": 0.00840778139536269, "epoch": 0.14037298240087734, "grad_norm": 3871.172119140625, "learning_rate": 2e-07, "loss": 0.2088, "step": 1504 }, { "clip_ratio/high_max": 0.008657270496769343, "clip_ratio/high_mean": 0.002906339032051619, "clip_ratio/low_mean": 0.006226025856449269, "clip_ratio/low_min": 0.0004049631261295872, "clip_ratio/region_mean": 0.00913236491032876, "epoch": 0.1404663155008779, "grad_norm": 7535.419921875, "learning_rate": 2e-07, "loss": 0.4838, "step": 1505 }, { "clip_ratio/high_max": 0.007821333303581923, "clip_ratio/high_mean": 0.002561605546361534, "clip_ratio/low_mean": 0.005767920461948961, "clip_ratio/low_min": 0.0009528511000098661, "clip_ratio/region_mean": 0.00832952605560422, "epoch": 0.1405596486008785, "grad_norm": 981.5530395507812, "learning_rate": 2e-07, "loss": 0.1498, "step": 1506 }, { "clip_ratio/high_max": 0.008284735813504085, "clip_ratio/high_mean": 0.0030128685393719934, "clip_ratio/low_mean": 0.006027543422533199, "clip_ratio/low_min": 0.0007774365185468923, "clip_ratio/region_mean": 0.00904041191097349, "epoch": 0.1406529817008791, "grad_norm": 128695.6171875, "learning_rate": 2e-07, "loss": 3.1596, "step": 1507 }, { "clip_ratio/high_max": 0.011088922386988997, "clip_ratio/high_mean": 0.0029328191158128902, "clip_ratio/low_mean": 0.005733198820962571, "clip_ratio/low_min": 0.0009822351203183644, "clip_ratio/region_mean": 0.008666017965879291, "epoch": 0.14074631480087965, "grad_norm": 1932257.0, "learning_rate": 2e-07, "loss": 32.916, "step": 1508 }, { "clip_ratio/high_max": 0.010011933220084757, "clip_ratio/high_mean": 0.0031488184395129792, "clip_ratio/low_mean": 0.006271047313930467, "clip_ratio/low_min": 0.0005625791200145613, "clip_ratio/region_mean": 0.009419865877134725, "epoch": 0.14083964790088024, "grad_norm": 101780216.0, "learning_rate": 2e-07, "loss": 2019.588, "step": 1509 }, { "clip_ratio/high_max": 0.010317530031898059, "clip_ratio/high_mean": 0.002930962356913369, "clip_ratio/low_mean": 0.006052033044397831, "clip_ratio/low_min": 0.0001870120358944405, "clip_ratio/region_mean": 0.008982995495898649, "epoch": 0.14093298100088084, "grad_norm": 71608.890625, "learning_rate": 2e-07, "loss": 0.7753, "step": 1510 }, { "clip_ratio/high_max": 0.008234702530899085, "clip_ratio/high_mean": 0.0024690155260032043, "clip_ratio/low_mean": 0.006267877572099678, "clip_ratio/low_min": 0.00024385101824009325, "clip_ratio/region_mean": 0.008736893098102883, "epoch": 0.14102631410088143, "grad_norm": 170235.46875, "learning_rate": 2e-07, "loss": 6.2341, "step": 1511 }, { "clip_ratio/high_max": 0.00928757048677653, "clip_ratio/high_mean": 0.002830470531989704, "clip_ratio/low_mean": 0.006002189940772951, "clip_ratio/low_min": 0.00033641987829469144, "clip_ratio/region_mean": 0.008832660474581644, "epoch": 0.141119647200882, "grad_norm": 9152.591796875, "learning_rate": 2e-07, "loss": 0.6037, "step": 1512 }, { "clip_ratio/high_max": 0.006777602873626165, "clip_ratio/high_mean": 0.0019467606271064142, "clip_ratio/low_mean": 0.006000280773150735, "clip_ratio/low_min": 0.000853325114803738, "clip_ratio/region_mean": 0.007947041623992845, "epoch": 0.14121298030088258, "grad_norm": 394326.5625, "learning_rate": 2e-07, "loss": 7.1667, "step": 1513 }, { "clip_ratio/high_max": 0.009296341973822564, "clip_ratio/high_mean": 0.00279895230778493, "clip_ratio/low_mean": 0.0075139227556064725, "clip_ratio/low_min": 0.0012116295874875505, "clip_ratio/region_mean": 0.010312875063391402, "epoch": 0.14130631340088318, "grad_norm": 16538.107421875, "learning_rate": 2e-07, "loss": 0.8827, "step": 1514 }, { "clip_ratio/high_max": 0.00968799258407671, "clip_ratio/high_mean": 0.002912926909630187, "clip_ratio/low_mean": 0.006292820238741115, "clip_ratio/low_min": 0.0006654458593402524, "clip_ratio/region_mean": 0.009205747221130878, "epoch": 0.14139964650088374, "grad_norm": 9471.0537109375, "learning_rate": 2e-07, "loss": 0.5107, "step": 1515 }, { "clip_ratio/high_max": 0.007081362971803173, "clip_ratio/high_mean": 0.0022146700976009015, "clip_ratio/low_mean": 0.007093589854775928, "clip_ratio/low_min": 0.0010639836837071925, "clip_ratio/region_mean": 0.009308260108809918, "epoch": 0.14149297960088433, "grad_norm": 14725.2978515625, "learning_rate": 2e-07, "loss": 0.626, "step": 1516 }, { "clip_ratio/high_max": 0.008661045256303623, "clip_ratio/high_mean": 0.002396198060523602, "clip_ratio/low_mean": 0.005552623959374614, "clip_ratio/low_min": 0.00036145358899375424, "clip_ratio/region_mean": 0.007948821788886562, "epoch": 0.14158631270088493, "grad_norm": 6603321344.0, "learning_rate": 2e-07, "loss": 154126.8281, "step": 1517 }, { "clip_ratio/high_max": 0.00894778678775765, "clip_ratio/high_mean": 0.002777270332444459, "clip_ratio/low_mean": 0.0058517157231108285, "clip_ratio/low_min": 0.0005723015892726835, "clip_ratio/region_mean": 0.008628986179246567, "epoch": 0.1416796458008855, "grad_norm": 655.6632690429688, "learning_rate": 2e-07, "loss": 0.1233, "step": 1518 }, { "clip_ratio/high_max": 0.011683010030537844, "clip_ratio/high_mean": 0.002982062622322701, "clip_ratio/low_mean": 0.005868957348866388, "clip_ratio/low_min": 0.0005655200802721083, "clip_ratio/region_mean": 0.008851020160363987, "epoch": 0.14177297890088608, "grad_norm": 4108.5869140625, "learning_rate": 2e-07, "loss": 0.3282, "step": 1519 }, { "clip_ratio/high_max": 0.009060731885256246, "clip_ratio/high_mean": 0.002882993663661182, "clip_ratio/low_mean": 0.006500883842818439, "clip_ratio/low_min": 0.0007748263869871153, "clip_ratio/region_mean": 0.009383877506479621, "epoch": 0.14186631200088667, "grad_norm": 486915.40625, "learning_rate": 2e-07, "loss": 4.2782, "step": 1520 }, { "clip_ratio/high_max": 0.0076965129119344056, "clip_ratio/high_mean": 0.00240215029407409, "clip_ratio/low_mean": 0.006893843208672479, "clip_ratio/low_min": 0.0004763833676406648, "clip_ratio/region_mean": 0.009295993309933692, "epoch": 0.14195964510088724, "grad_norm": 4495.9638671875, "learning_rate": 2e-07, "loss": 0.2884, "step": 1521 }, { "clip_ratio/high_max": 0.008645318142953329, "clip_ratio/high_mean": 0.002517766995879356, "clip_ratio/low_mean": 0.006513162807095796, "clip_ratio/low_min": 0.0010720892714743968, "clip_ratio/region_mean": 0.009030929912114516, "epoch": 0.14205297820088783, "grad_norm": 99.81855773925781, "learning_rate": 2e-07, "loss": 0.1018, "step": 1522 }, { "clip_ratio/high_max": 0.00824895515688695, "clip_ratio/high_mean": 0.0025654626115283463, "clip_ratio/low_mean": 0.006599472995731048, "clip_ratio/low_min": 0.0006309193122433499, "clip_ratio/region_mean": 0.009164935851003975, "epoch": 0.14214631130088842, "grad_norm": 146747968.0, "learning_rate": 2e-07, "loss": 1866.9125, "step": 1523 }, { "clip_ratio/high_max": 0.010064619942568243, "clip_ratio/high_mean": 0.00298382701294031, "clip_ratio/low_mean": 0.007671121944440529, "clip_ratio/low_min": 0.0009944448429450858, "clip_ratio/region_mean": 0.010654948971932754, "epoch": 0.142239644400889, "grad_norm": 3083.24853515625, "learning_rate": 2e-07, "loss": 0.2349, "step": 1524 }, { "clip_ratio/high_max": 0.008845160758937709, "clip_ratio/high_mean": 0.002780696016998263, "clip_ratio/low_mean": 0.0065067932300735265, "clip_ratio/low_min": 0.000947483040363295, "clip_ratio/region_mean": 0.009287489228881896, "epoch": 0.14233297750088958, "grad_norm": 21589.935546875, "learning_rate": 2e-07, "loss": 1.1667, "step": 1525 }, { "clip_ratio/high_max": 0.009768448908289429, "clip_ratio/high_mean": 0.0029229416395537555, "clip_ratio/low_mean": 0.006703561943140812, "clip_ratio/low_min": 0.0009810920591917238, "clip_ratio/region_mean": 0.009626503800973296, "epoch": 0.14242631060089017, "grad_norm": 8000.0712890625, "learning_rate": 2e-07, "loss": 0.6816, "step": 1526 }, { "clip_ratio/high_max": 0.00900485587771982, "clip_ratio/high_mean": 0.002676327021617908, "clip_ratio/low_mean": 0.007134319545002654, "clip_ratio/low_min": 0.000811913605502923, "clip_ratio/region_mean": 0.009810646646656096, "epoch": 0.14251964370089074, "grad_norm": 73758.578125, "learning_rate": 2e-07, "loss": 3.4707, "step": 1527 }, { "clip_ratio/high_max": 0.009686201301519759, "clip_ratio/high_mean": 0.003278753072663676, "clip_ratio/low_mean": 0.006450626082369126, "clip_ratio/low_min": 0.00015682379671488889, "clip_ratio/region_mean": 0.009729379002237692, "epoch": 0.14261297680089133, "grad_norm": 807.4011840820312, "learning_rate": 2e-07, "loss": 0.1693, "step": 1528 }, { "clip_ratio/high_max": 0.007009586464846507, "clip_ratio/high_mean": 0.002023140790697653, "clip_ratio/low_mean": 0.005424646835308522, "clip_ratio/low_min": 0.0004429418868312496, "clip_ratio/region_mean": 0.00744778745865915, "epoch": 0.14270630990089192, "grad_norm": 61271.99609375, "learning_rate": 2e-07, "loss": 1.3766, "step": 1529 }, { "clip_ratio/high_max": 0.010643712928867899, "clip_ratio/high_mean": 0.002818359855154995, "clip_ratio/low_mean": 0.006773369270376861, "clip_ratio/low_min": 0.00038101487643871224, "clip_ratio/region_mean": 0.009591729118255898, "epoch": 0.14279964300089248, "grad_norm": 2024.4600830078125, "learning_rate": 2e-07, "loss": 0.2084, "step": 1530 }, { "clip_ratio/high_max": 0.009214299556333572, "clip_ratio/high_mean": 0.0031974725243344437, "clip_ratio/low_mean": 0.008182010235032067, "clip_ratio/low_min": 0.0010511075015529059, "clip_ratio/region_mean": 0.011379483068594709, "epoch": 0.14289297610089308, "grad_norm": 170745.96875, "learning_rate": 2e-07, "loss": 2.1218, "step": 1531 }, { "clip_ratio/high_max": 0.009414680651389062, "clip_ratio/high_mean": 0.0030728381061635446, "clip_ratio/low_mean": 0.0074654604541137815, "clip_ratio/low_min": 0.0008200504489650484, "clip_ratio/region_mean": 0.01053829852025956, "epoch": 0.14298630920089367, "grad_norm": 21289.640625, "learning_rate": 2e-07, "loss": 0.7571, "step": 1532 }, { "clip_ratio/high_max": 0.01055282712331973, "clip_ratio/high_mean": 0.003025742553290911, "clip_ratio/low_mean": 0.007302983169211075, "clip_ratio/low_min": 0.0006236130611796398, "clip_ratio/region_mean": 0.010328725766157731, "epoch": 0.14307964230089426, "grad_norm": 163816.78125, "learning_rate": 2e-07, "loss": 5.4718, "step": 1533 }, { "clip_ratio/high_max": 0.011372782566468231, "clip_ratio/high_mean": 0.0029087152870488353, "clip_ratio/low_mean": 0.006545605690917, "clip_ratio/low_min": 0.0008421165512118023, "clip_ratio/region_mean": 0.009454320956137963, "epoch": 0.14317297540089483, "grad_norm": 354433.9375, "learning_rate": 2e-07, "loss": 7.3524, "step": 1534 }, { "clip_ratio/high_max": 0.009848656656686217, "clip_ratio/high_mean": 0.0031519729673163965, "clip_ratio/low_mean": 0.0070607112284051254, "clip_ratio/low_min": 0.0006706478179694386, "clip_ratio/region_mean": 0.010212684137513861, "epoch": 0.14326630850089542, "grad_norm": 13342.822265625, "learning_rate": 2e-07, "loss": 0.6595, "step": 1535 }, { "clip_ratio/high_max": 0.012102187276468612, "clip_ratio/high_mean": 0.0030948711282690056, "clip_ratio/low_mean": 0.0074130619468633085, "clip_ratio/low_min": 0.0011056901566917077, "clip_ratio/region_mean": 0.010507932980544865, "epoch": 0.143359641600896, "grad_norm": 60809716.0, "learning_rate": 2e-07, "loss": 2395.5427, "step": 1536 }, { "clip_ratio/high_max": 0.002211614621046465, "clip_ratio/high_mean": 0.000705748569998832, "clip_ratio/low_mean": 0.002111162139044609, "clip_ratio/low_min": 2.3432032321579754e-05, "clip_ratio/region_mean": 0.002816910739056766, "completions/clipped_ratio": 0.10809326171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 879.2676391601562, "completions/mean_terminated_length": 489.420654296875, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.14345297470089657, "grad_norm": 8.912792205810547, "learning_rate": 2e-07, "loss": 0.0888, "num_tokens": 1170370290.0, "reward": 0.1374773383140564, "reward_std": 0.12984158098697662, "rewards/simpleverify_reward/mean": 0.1374773234128952, "rewards/simpleverify_reward/std": 0.34435197710990906, "step": 1537 }, { "clip_ratio/high_max": 0.003377609893504996, "clip_ratio/high_mean": 0.0008671264563417935, "clip_ratio/low_mean": 0.0021102764003444463, "clip_ratio/low_min": 8.471232831652742e-05, "clip_ratio/region_mean": 0.002977402815304231, "epoch": 0.14354630780089717, "grad_norm": 304.5766296386719, "learning_rate": 2e-07, "loss": 0.0585, "step": 1538 }, { "clip_ratio/high_max": 0.0026005116465057654, "clip_ratio/high_mean": 0.0007171390060989324, "clip_ratio/low_mean": 0.002032235912338365, "clip_ratio/low_min": 2.346100291106268e-05, "clip_ratio/region_mean": 0.0027493749366840348, "epoch": 0.14363964090089776, "grad_norm": 111.13668823242188, "learning_rate": 2e-07, "loss": 0.0978, "step": 1539 }, { "clip_ratio/high_max": 0.002278100657349569, "clip_ratio/high_mean": 0.0006155387409307878, "clip_ratio/low_mean": 0.0025929062394425273, "clip_ratio/low_min": 0.00032243828627542825, "clip_ratio/region_mean": 0.003208444955816958, "epoch": 0.14373297400089832, "grad_norm": 239.9208984375, "learning_rate": 2e-07, "loss": 0.134, "step": 1540 }, { "clip_ratio/high_max": 0.0024939209270087304, "clip_ratio/high_mean": 0.0006617030683173653, "clip_ratio/low_mean": 0.0019426922335696872, "clip_ratio/low_min": 2.3610215066582896e-05, "clip_ratio/region_mean": 0.0026043952857435215, "epoch": 0.14382630710089891, "grad_norm": 9.009964942932129, "learning_rate": 2e-07, "loss": 0.0746, "step": 1541 }, { "clip_ratio/high_max": 0.003134078571747523, "clip_ratio/high_mean": 0.0007966041393956402, "clip_ratio/low_mean": 0.001956724860065151, "clip_ratio/low_min": 8.228290266742988e-05, "clip_ratio/region_mean": 0.0027533290412975475, "epoch": 0.1439196402008995, "grad_norm": 164.1133575439453, "learning_rate": 2e-07, "loss": 0.0244, "step": 1542 }, { "clip_ratio/high_max": 0.0033183138402819168, "clip_ratio/high_mean": 0.0009084561879717512, "clip_ratio/low_mean": 0.0022759906642022543, "clip_ratio/low_min": 4.203094385957229e-05, "clip_ratio/region_mean": 0.003184446832165122, "epoch": 0.14401297330090007, "grad_norm": 13.685999870300293, "learning_rate": 2e-07, "loss": 0.0706, "step": 1543 }, { "clip_ratio/high_max": 0.002249881421448663, "clip_ratio/high_mean": 0.000603701439104043, "clip_ratio/low_mean": 0.0024824213614920154, "clip_ratio/low_min": 2.545327788538998e-05, "clip_ratio/region_mean": 0.003086122822423931, "epoch": 0.14410630640090066, "grad_norm": 6.723982334136963, "learning_rate": 2e-07, "loss": 0.031, "step": 1544 }, { "clip_ratio/high_max": 0.002946922613773495, "clip_ratio/high_mean": 0.0007225065901366179, "clip_ratio/low_mean": 0.002621623149025254, "clip_ratio/low_min": 9.045352271641605e-05, "clip_ratio/region_mean": 0.003344129756442271, "epoch": 0.14419963950090126, "grad_norm": 20375.833984375, "learning_rate": 2e-07, "loss": 0.6133, "step": 1545 }, { "clip_ratio/high_max": 0.0022476805788755883, "clip_ratio/high_mean": 0.000642785543732316, "clip_ratio/low_mean": 0.0026485782291274518, "clip_ratio/low_min": 4.6672266762470827e-05, "clip_ratio/region_mean": 0.0032913637332967483, "epoch": 0.14429297260090182, "grad_norm": 18.060691833496094, "learning_rate": 2e-07, "loss": 0.0299, "step": 1546 }, { "clip_ratio/high_max": 0.001954823801497696, "clip_ratio/high_mean": 0.0005951875868959178, "clip_ratio/low_mean": 0.002449255589453969, "clip_ratio/low_min": 1.5555002391920425e-05, "clip_ratio/region_mean": 0.003044443146791309, "epoch": 0.1443863057009024, "grad_norm": 55.120121002197266, "learning_rate": 2e-07, "loss": 0.0908, "step": 1547 }, { "clip_ratio/high_max": 0.0035312757827341557, "clip_ratio/high_mean": 0.0008054050349528552, "clip_ratio/low_mean": 0.004205702505714726, "clip_ratio/low_min": 7.32463231543079e-05, "clip_ratio/region_mean": 0.005011107627069578, "epoch": 0.144479638800903, "grad_norm": 2433.57080078125, "learning_rate": 2e-07, "loss": 0.1397, "step": 1548 }, { "clip_ratio/high_max": 0.002916299537901068, "clip_ratio/high_mean": 0.0007489319314117893, "clip_ratio/low_mean": 0.003125309718598146, "clip_ratio/low_min": 3.756762089324184e-05, "clip_ratio/region_mean": 0.003874241592711769, "epoch": 0.14457297190090357, "grad_norm": 24.147611618041992, "learning_rate": 2e-07, "loss": 0.0948, "step": 1549 }, { "clip_ratio/high_max": 0.004565006791381165, "clip_ratio/high_mean": 0.0013607368819066323, "clip_ratio/low_mean": 0.003185382403898984, "clip_ratio/low_min": 9.3478524377133e-05, "clip_ratio/region_mean": 0.0045461192858056165, "epoch": 0.14466630500090416, "grad_norm": 449.8078308105469, "learning_rate": 2e-07, "loss": 0.0714, "step": 1550 }, { "clip_ratio/high_max": 0.0029853354280930944, "clip_ratio/high_mean": 0.0008573930126658524, "clip_ratio/low_mean": 0.003532382288540248, "clip_ratio/low_min": 0.00013433950880425982, "clip_ratio/region_mean": 0.00438977537851315, "epoch": 0.14475963810090475, "grad_norm": 80.31497192382812, "learning_rate": 2e-07, "loss": 0.0962, "step": 1551 }, { "clip_ratio/high_max": 0.005441060227894923, "clip_ratio/high_mean": 0.0015295807188522303, "clip_ratio/low_mean": 0.0029297967412276193, "clip_ratio/low_min": 6.410304649762111e-05, "clip_ratio/region_mean": 0.0044593773782253265, "epoch": 0.14485297120090532, "grad_norm": 230446.203125, "learning_rate": 2e-07, "loss": 4.666, "step": 1552 }, { "clip_ratio/high_max": 0.004410355744767003, "clip_ratio/high_mean": 0.0011352671217537136, "clip_ratio/low_mean": 0.003462927110376768, "clip_ratio/low_min": 0.0001372272217849968, "clip_ratio/region_mean": 0.0045981942530488595, "epoch": 0.1449463043009059, "grad_norm": 297.504150390625, "learning_rate": 2e-07, "loss": 0.0615, "step": 1553 }, { "clip_ratio/high_max": 0.002862271670892369, "clip_ratio/high_mean": 0.0007584672930533998, "clip_ratio/low_mean": 0.005094815154734533, "clip_ratio/low_min": 0.000385858253139304, "clip_ratio/region_mean": 0.005853282520547509, "epoch": 0.1450396374009065, "grad_norm": 6765.0322265625, "learning_rate": 2e-07, "loss": 0.2766, "step": 1554 }, { "clip_ratio/high_max": 0.0031182668208202813, "clip_ratio/high_mean": 0.0008782948007137747, "clip_ratio/low_mean": 0.002892296528443694, "clip_ratio/low_min": 7.104596261342522e-05, "clip_ratio/region_mean": 0.00377059135644231, "epoch": 0.1451329705009071, "grad_norm": 1153.030029296875, "learning_rate": 2e-07, "loss": 0.1065, "step": 1555 }, { "clip_ratio/high_max": 0.005102578812511638, "clip_ratio/high_mean": 0.0014307714445749298, "clip_ratio/low_mean": 0.0036433572313399054, "clip_ratio/low_min": 0.00013421998028206872, "clip_ratio/region_mean": 0.005074128734122496, "epoch": 0.14522630360090766, "grad_norm": 7667.8544921875, "learning_rate": 2e-07, "loss": 0.3593, "step": 1556 }, { "clip_ratio/high_max": 0.0034624508325578063, "clip_ratio/high_mean": 0.0010738922023847408, "clip_ratio/low_mean": 0.00438159688928863, "clip_ratio/low_min": 0.0004016568691440625, "clip_ratio/region_mean": 0.0054554891685256734, "epoch": 0.14531963670090825, "grad_norm": 16536145.0, "learning_rate": 2e-07, "loss": 1526.3396, "step": 1557 }, { "clip_ratio/high_max": 0.003695548215546296, "clip_ratio/high_mean": 0.000976851378709398, "clip_ratio/low_mean": 0.0034563163098937366, "clip_ratio/low_min": 4.529960096988361e-05, "clip_ratio/region_mean": 0.004433167814568151, "epoch": 0.14541296980090884, "grad_norm": 16510.6171875, "learning_rate": 2e-07, "loss": 0.3931, "step": 1558 }, { "clip_ratio/high_max": 0.00559506018544198, "clip_ratio/high_mean": 0.00154195248387623, "clip_ratio/low_mean": 0.003374817155417986, "clip_ratio/low_min": 6.49913345114328e-05, "clip_ratio/region_mean": 0.004916769670671783, "epoch": 0.1455063029009094, "grad_norm": 485039232.0, "learning_rate": 2e-07, "loss": 5249.3389, "step": 1559 }, { "clip_ratio/high_max": 0.00534536205304903, "clip_ratio/high_mean": 0.0014025170357854222, "clip_ratio/low_mean": 0.004518730434938334, "clip_ratio/low_min": 0.00019520846581144724, "clip_ratio/region_mean": 0.0059212476189713925, "epoch": 0.14559963600091, "grad_norm": 165.0186309814453, "learning_rate": 2e-07, "loss": 0.0762, "step": 1560 }, { "clip_ratio/high_max": 0.005722401670936961, "clip_ratio/high_mean": 0.0013543949880840955, "clip_ratio/low_mean": 0.002925886758021079, "clip_ratio/low_min": 2.0229810616001487e-05, "clip_ratio/region_mean": 0.004280281762476079, "epoch": 0.1456929691009106, "grad_norm": 210335.40625, "learning_rate": 2e-07, "loss": 5.861, "step": 1561 }, { "clip_ratio/high_max": 0.004633546512195608, "clip_ratio/high_mean": 0.001213361195368634, "clip_ratio/low_mean": 0.0038611189665971324, "clip_ratio/low_min": 0.00012450464055291377, "clip_ratio/region_mean": 0.005074480002804194, "epoch": 0.14578630220091116, "grad_norm": 4127.84619140625, "learning_rate": 2e-07, "loss": 0.2817, "step": 1562 }, { "clip_ratio/high_max": 0.0035272110671940027, "clip_ratio/high_mean": 0.001039553351347422, "clip_ratio/low_mean": 0.00428113408270292, "clip_ratio/low_min": 0.0002839450671672239, "clip_ratio/region_mean": 0.005320687203493435, "epoch": 0.14587963530091175, "grad_norm": 25929.2109375, "learning_rate": 2e-07, "loss": 1.1931, "step": 1563 }, { "clip_ratio/high_max": 0.005111567632411607, "clip_ratio/high_mean": 0.0012572412379086018, "clip_ratio/low_mean": 0.004145460217841901, "clip_ratio/low_min": 0.00026798166800290346, "clip_ratio/region_mean": 0.005402701324783266, "epoch": 0.14597296840091234, "grad_norm": 1335.1209716796875, "learning_rate": 2e-07, "loss": 0.1371, "step": 1564 }, { "clip_ratio/high_max": 0.004718204494565725, "clip_ratio/high_mean": 0.0013683755532838404, "clip_ratio/low_mean": 0.0036812634862144478, "clip_ratio/low_min": 3.937769724871032e-05, "clip_ratio/region_mean": 0.005049638952186797, "epoch": 0.1460663015009129, "grad_norm": 747700800.0, "learning_rate": 2e-07, "loss": 6094.103, "step": 1565 }, { "clip_ratio/high_max": 0.0054401040833909065, "clip_ratio/high_mean": 0.0014174259868013905, "clip_ratio/low_mean": 0.003600897718570195, "clip_ratio/low_min": 0.0001460823605157202, "clip_ratio/region_mean": 0.005018323776312172, "epoch": 0.1461596346009135, "grad_norm": 932.6537475585938, "learning_rate": 2e-07, "loss": 0.0837, "step": 1566 }, { "clip_ratio/high_max": 0.006658624064584728, "clip_ratio/high_mean": 0.0014899123980285367, "clip_ratio/low_mean": 0.003889949934091419, "clip_ratio/low_min": 0.00014982879474700894, "clip_ratio/region_mean": 0.005379862283007242, "epoch": 0.1462529677009141, "grad_norm": 124.15176391601562, "learning_rate": 2e-07, "loss": 0.0764, "step": 1567 }, { "clip_ratio/high_max": 0.00475490639655618, "clip_ratio/high_mean": 0.0013247423648863332, "clip_ratio/low_mean": 0.004170502514170948, "clip_ratio/low_min": 0.00012030243669869378, "clip_ratio/region_mean": 0.005495244840858504, "epoch": 0.14634630080091465, "grad_norm": 40511.26953125, "learning_rate": 2e-07, "loss": 0.6947, "step": 1568 }, { "clip_ratio/high_max": 0.005593403242528439, "clip_ratio/high_mean": 0.0013622679089166922, "clip_ratio/low_mean": 0.003660749163827859, "clip_ratio/low_min": 4.648140566132497e-05, "clip_ratio/region_mean": 0.005023017001803964, "epoch": 0.14643963390091524, "grad_norm": 163.91114807128906, "learning_rate": 2e-07, "loss": 0.0626, "step": 1569 }, { "clip_ratio/high_max": 0.004075331264175475, "clip_ratio/high_mean": 0.0011435385549702914, "clip_ratio/low_mean": 0.004240931251842994, "clip_ratio/low_min": 4.470250860322267e-05, "clip_ratio/region_mean": 0.005384469957789406, "epoch": 0.14653296700091584, "grad_norm": 13853.1025390625, "learning_rate": 2e-07, "loss": 1.2314, "step": 1570 }, { "clip_ratio/high_max": 0.005531830865947995, "clip_ratio/high_mean": 0.001437484595044225, "clip_ratio/low_mean": 0.0036498199769994244, "clip_ratio/low_min": 3.8118207157822326e-05, "clip_ratio/region_mean": 0.005087304627522826, "epoch": 0.1466263001009164, "grad_norm": 1689.6385498046875, "learning_rate": 2e-07, "loss": 0.1085, "step": 1571 }, { "clip_ratio/high_max": 0.004857852436543908, "clip_ratio/high_mean": 0.0013512089280993678, "clip_ratio/low_mean": 0.0038360027174348943, "clip_ratio/low_min": 0.00026019781216746196, "clip_ratio/region_mean": 0.005187211616430432, "epoch": 0.146719633200917, "grad_norm": 88033776.0, "learning_rate": 2e-07, "loss": 796.0383, "step": 1572 }, { "clip_ratio/high_max": 0.004646780667826533, "clip_ratio/high_mean": 0.001323175718425773, "clip_ratio/low_mean": 0.004232597209920641, "clip_ratio/low_min": 0.00047262019506888464, "clip_ratio/region_mean": 0.00555577281920705, "epoch": 0.14681296630091759, "grad_norm": 63.26753616333008, "learning_rate": 2e-07, "loss": 0.0899, "step": 1573 }, { "clip_ratio/high_max": 0.005728037140215747, "clip_ratio/high_mean": 0.0014619586891058134, "clip_ratio/low_mean": 0.004012962152046384, "clip_ratio/low_min": 0.00012834234530600952, "clip_ratio/region_mean": 0.00547492089390289, "epoch": 0.14690629940091818, "grad_norm": 280492.1875, "learning_rate": 2e-07, "loss": 8.8764, "step": 1574 }, { "clip_ratio/high_max": 0.0066244946865481324, "clip_ratio/high_mean": 0.0017325219796475722, "clip_ratio/low_mean": 0.003476656274870038, "clip_ratio/low_min": 0.00010099716746481135, "clip_ratio/region_mean": 0.0052091781835770234, "epoch": 0.14699963250091874, "grad_norm": 2692.86962890625, "learning_rate": 2e-07, "loss": 0.0937, "step": 1575 }, { "clip_ratio/high_max": 0.004328511364292353, "clip_ratio/high_mean": 0.001128164745750837, "clip_ratio/low_mean": 0.003012695971847279, "clip_ratio/low_min": 0.00012139391219534446, "clip_ratio/region_mean": 0.004140860750339925, "epoch": 0.14709296560091933, "grad_norm": 9919.4365234375, "learning_rate": 2e-07, "loss": 0.2977, "step": 1576 }, { "clip_ratio/high_max": 0.0042594378246576525, "clip_ratio/high_mean": 0.0012430160604708362, "clip_ratio/low_mean": 0.00315980378218228, "clip_ratio/low_min": 0.0003874116810038686, "clip_ratio/region_mean": 0.004402819919050671, "epoch": 0.14718629870091993, "grad_norm": 20244.82421875, "learning_rate": 2e-07, "loss": 0.7428, "step": 1577 }, { "clip_ratio/high_max": 0.004609750802046619, "clip_ratio/high_mean": 0.0013378753574215807, "clip_ratio/low_mean": 0.003870659704261925, "clip_ratio/low_min": 2.023308479692787e-05, "clip_ratio/region_mean": 0.005208535127167124, "epoch": 0.1472796318009205, "grad_norm": 24321.505859375, "learning_rate": 2e-07, "loss": 0.6691, "step": 1578 }, { "clip_ratio/high_max": 0.0065661854823702015, "clip_ratio/high_mean": 0.0017536837694933638, "clip_ratio/low_mean": 0.003323837951029418, "clip_ratio/low_min": 0.00021239481611701194, "clip_ratio/region_mean": 0.005077521709608845, "epoch": 0.14737296490092108, "grad_norm": 2095457.125, "learning_rate": 2e-07, "loss": 38.7576, "step": 1579 }, { "clip_ratio/high_max": 0.005795391174615361, "clip_ratio/high_mean": 0.0015046996595629025, "clip_ratio/low_mean": 0.003340660172398202, "clip_ratio/low_min": 0.0003773459047806682, "clip_ratio/region_mean": 0.004845359748287592, "epoch": 0.14746629800092168, "grad_norm": 4998.470703125, "learning_rate": 2e-07, "loss": 0.1908, "step": 1580 }, { "clip_ratio/high_max": 0.00371054661081871, "clip_ratio/high_mean": 0.0011066176084568724, "clip_ratio/low_mean": 0.00423023896291852, "clip_ratio/low_min": 6.965451029827818e-05, "clip_ratio/region_mean": 0.005336856687790714, "epoch": 0.14755963110092224, "grad_norm": 74389.8984375, "learning_rate": 2e-07, "loss": 5.8943, "step": 1581 }, { "clip_ratio/high_max": 0.004457714385353029, "clip_ratio/high_mean": 0.001305895759287523, "clip_ratio/low_mean": 0.0033970124713960104, "clip_ratio/low_min": 0.00012230996981088538, "clip_ratio/region_mean": 0.004702908307081088, "epoch": 0.14765296420092283, "grad_norm": 646.9983520507812, "learning_rate": 2e-07, "loss": 0.0924, "step": 1582 }, { "clip_ratio/high_max": 0.0046272432955447584, "clip_ratio/high_mean": 0.0012296808290557237, "clip_ratio/low_mean": 0.004426087674801238, "clip_ratio/low_min": 0.000152047964547819, "clip_ratio/region_mean": 0.005655768472934142, "epoch": 0.14774629730092342, "grad_norm": 368296.4375, "learning_rate": 2e-07, "loss": 3.9697, "step": 1583 }, { "clip_ratio/high_max": 0.005422829963208642, "clip_ratio/high_mean": 0.0016615514396107756, "clip_ratio/low_mean": 0.004016717757622246, "clip_ratio/low_min": 7.273552637343528e-05, "clip_ratio/region_mean": 0.005678269153577276, "epoch": 0.147839630400924, "grad_norm": 174.88726806640625, "learning_rate": 2e-07, "loss": 0.092, "step": 1584 }, { "clip_ratio/high_max": 0.005313081717758905, "clip_ratio/high_mean": 0.0016538888139621122, "clip_ratio/low_mean": 0.003769687384192366, "clip_ratio/low_min": 0.0002925292337749852, "clip_ratio/region_mean": 0.00542357619269751, "epoch": 0.14793296350092458, "grad_norm": 2833930.0, "learning_rate": 2e-07, "loss": 945.7839, "step": 1585 }, { "clip_ratio/high_max": 0.005131533398525789, "clip_ratio/high_mean": 0.0016438806324003963, "clip_ratio/low_mean": 0.0038448995910584927, "clip_ratio/low_min": 9.757194493431598e-05, "clip_ratio/region_mean": 0.005488780167070217, "epoch": 0.14802629660092517, "grad_norm": 19251790.0, "learning_rate": 2e-07, "loss": 374.1506, "step": 1586 }, { "clip_ratio/high_max": 0.007890586202847771, "clip_ratio/high_mean": 0.0019477940386423143, "clip_ratio/low_mean": 0.004134977894864278, "clip_ratio/low_min": 9.48214074014686e-05, "clip_ratio/region_mean": 0.0060827720299130306, "epoch": 0.14811962970092574, "grad_norm": 277349.71875, "learning_rate": 2e-07, "loss": 5.5691, "step": 1587 }, { "clip_ratio/high_max": 0.006817913948907517, "clip_ratio/high_mean": 0.0018671789966901997, "clip_ratio/low_mean": 0.00415114316274412, "clip_ratio/low_min": 0.00019127158157061785, "clip_ratio/region_mean": 0.006018322164891288, "epoch": 0.14821296280092633, "grad_norm": 5144287.0, "learning_rate": 2e-07, "loss": 191.1135, "step": 1588 }, { "clip_ratio/high_max": 0.008147741107677575, "clip_ratio/high_mean": 0.001904898937937105, "clip_ratio/low_mean": 0.00343523077754071, "clip_ratio/low_min": 7.04045087331906e-05, "clip_ratio/region_mean": 0.005340129624528345, "epoch": 0.14830629590092692, "grad_norm": 410224.40625, "learning_rate": 2e-07, "loss": 6.3965, "step": 1589 }, { "clip_ratio/high_max": 0.00803140282368986, "clip_ratio/high_mean": 0.0018679034219530877, "clip_ratio/low_mean": 0.0037766193636343814, "clip_ratio/low_min": 0.00022736227492714534, "clip_ratio/region_mean": 0.0056445228547090665, "epoch": 0.14839962900092749, "grad_norm": 14322665.0, "learning_rate": 2e-07, "loss": 96144.8359, "step": 1590 }, { "clip_ratio/high_max": 0.004955992611940019, "clip_ratio/high_mean": 0.0014597349036193918, "clip_ratio/low_mean": 0.0037004901314503513, "clip_ratio/low_min": 0.0002092050272040069, "clip_ratio/region_mean": 0.005160225096915383, "epoch": 0.14849296210092808, "grad_norm": 253693.3125, "learning_rate": 2e-07, "loss": 15.0933, "step": 1591 }, { "clip_ratio/high_max": 0.005251635680906475, "clip_ratio/high_mean": 0.0013144236245352658, "clip_ratio/low_mean": 0.004352452626335435, "clip_ratio/low_min": 0.00034142493223043857, "clip_ratio/region_mean": 0.0056668763209017925, "epoch": 0.14858629520092867, "grad_norm": 494.1700134277344, "learning_rate": 2e-07, "loss": 0.1271, "step": 1592 }, { "clip_ratio/high_max": 0.003936445957151591, "clip_ratio/high_mean": 0.0012080175761184364, "clip_ratio/low_mean": 0.004424463950272184, "clip_ratio/low_min": 0.0001345091450275504, "clip_ratio/region_mean": 0.005632481494103558, "epoch": 0.14867962830092923, "grad_norm": 82.83440399169922, "learning_rate": 2e-07, "loss": 0.0821, "step": 1593 }, { "clip_ratio/high_max": 0.004486677578825038, "clip_ratio/high_mean": 0.001299912815738935, "clip_ratio/low_mean": 0.002958880224468885, "clip_ratio/low_min": 0.00010370916788815521, "clip_ratio/region_mean": 0.004258793043845799, "epoch": 0.14877296140092983, "grad_norm": 458508.65625, "learning_rate": 2e-07, "loss": 16.2921, "step": 1594 }, { "clip_ratio/high_max": 0.005936224195465911, "clip_ratio/high_mean": 0.0014894535561325029, "clip_ratio/low_mean": 0.00339493814681191, "clip_ratio/low_min": 4.909662311547436e-05, "clip_ratio/region_mean": 0.004884391753876116, "epoch": 0.14886629450093042, "grad_norm": 6183336.5, "learning_rate": 2e-07, "loss": 149.469, "step": 1595 }, { "clip_ratio/high_max": 0.006086679321015254, "clip_ratio/high_mean": 0.0016151971140061505, "clip_ratio/low_mean": 0.00464912093593739, "clip_ratio/low_min": 0.00021799594833282754, "clip_ratio/region_mean": 0.006264318260946311, "epoch": 0.148959627600931, "grad_norm": 10580483.0, "learning_rate": 2e-07, "loss": 161.0538, "step": 1596 }, { "clip_ratio/high_max": 0.005749824020313099, "clip_ratio/high_mean": 0.0013815177690048586, "clip_ratio/low_mean": 0.00399625002319226, "clip_ratio/low_min": 0.00021896869657211937, "clip_ratio/region_mean": 0.005377767738536932, "epoch": 0.14905296070093157, "grad_norm": 2545.24951171875, "learning_rate": 2e-07, "loss": 0.1768, "step": 1597 }, { "clip_ratio/high_max": 0.006102879633544944, "clip_ratio/high_mean": 0.0018576995244075079, "clip_ratio/low_mean": 0.005006935891287867, "clip_ratio/low_min": 0.0004831906062463531, "clip_ratio/region_mean": 0.006864635230158456, "epoch": 0.14914629380093217, "grad_norm": 1132.4833984375, "learning_rate": 2e-07, "loss": 0.1417, "step": 1598 }, { "clip_ratio/high_max": 0.005940967083006399, "clip_ratio/high_mean": 0.001463068410885171, "clip_ratio/low_mean": 0.004041787993628532, "clip_ratio/low_min": 0.00042293403203075286, "clip_ratio/region_mean": 0.005504856526385993, "epoch": 0.14923962690093276, "grad_norm": 2489.97607421875, "learning_rate": 2e-07, "loss": 0.1601, "step": 1599 }, { "clip_ratio/high_max": 0.003541258272889536, "clip_ratio/high_mean": 0.0010232240401819581, "clip_ratio/low_mean": 0.004104691630345769, "clip_ratio/low_min": 0.0002154818139388226, "clip_ratio/region_mean": 0.005127915603225119, "epoch": 0.14933296000093332, "grad_norm": 256.2895812988281, "learning_rate": 2e-07, "loss": 0.1075, "step": 1600 }, { "epoch": 0.14933296000093332, "step": 1600, "total_flos": 0.0, "train_loss": 349.1145901117878, "train_runtime": 123659.1561, "train_samples_per_second": 11.593, "train_steps_per_second": 0.013 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 1170370290, "num_train_epochs": 1, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }