{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14933296000093332, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013724190848214302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 599.6725463867188, "completions/mean_terminated_length": 551.0205078125, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 9.333310000058333e-05, "grad_norm": 0.1351688802242279, "learning_rate": 1e-06, "loss": 0.0478, "num_tokens": 80197318.0, "reward": 0.4954223930835724, "reward_std": 0.26438432931900024, "rewards/simpleverify_reward/mean": 0.49542236328125, "rewards/simpleverify_reward/std": 0.49998122453689575, "step": 1 }, { "clip_ratio/high_max": 0.0023478127041016705, "clip_ratio/high_mean": 0.0011073373243561946, "clip_ratio/low_mean": 0.000518497255143302, "clip_ratio/low_min": 3.340999137435574e-05, "clip_ratio/region_mean": 0.0016258345349342562, "epoch": 0.00018666620000116666, "grad_norm": 0.12884752452373505, "learning_rate": 1e-06, "loss": -0.0299, "step": 2 }, { "clip_ratio/high_max": 0.0019141813027090393, "clip_ratio/high_mean": 0.0008050140222621849, "clip_ratio/low_mean": 0.0007772632998239715, "clip_ratio/low_min": 0.00011569632079044823, "clip_ratio/region_mean": 0.0015822773493709974, "epoch": 0.00027999930000175, "grad_norm": 0.10905759036540985, "learning_rate": 1e-06, "loss": 0.0585, "step": 3 }, { "clip_ratio/high_max": 0.0022400690213544294, "clip_ratio/high_mean": 0.0009223831257259008, "clip_ratio/low_mean": 0.0009029188040585723, "clip_ratio/low_min": 0.00013121906431479147, "clip_ratio/region_mean": 0.0018253018861287273, "epoch": 0.0003733324000023333, "grad_norm": 0.13122092187404633, "learning_rate": 1e-06, "loss": 0.0623, "step": 4 }, { "clip_ratio/high_max": 0.002449180894473102, "clip_ratio/high_mean": 0.0011378790113667492, "clip_ratio/low_mean": 0.0008680689552420517, "clip_ratio/low_min": 0.00010780939919641241, "clip_ratio/region_mean": 0.0020059479647898115, "epoch": 0.0004666655000029167, "grad_norm": 0.1299799531698227, "learning_rate": 1e-06, "loss": -0.0001, "step": 5 }, { "clip_ratio/high_max": 0.0027988183064735495, "clip_ratio/high_mean": 0.0012843796102970373, "clip_ratio/low_mean": 0.0013021820304857101, "clip_ratio/low_min": 0.00023420465731760487, "clip_ratio/region_mean": 0.002586561626230832, "epoch": 0.0005599986000035, "grad_norm": 0.13312117755413055, "learning_rate": 1e-06, "loss": -0.0133, "step": 6 }, { "clip_ratio/high_max": 0.002765578028629534, "clip_ratio/high_mean": 0.0011841880659630988, "clip_ratio/low_mean": 0.0013377274335653055, "clip_ratio/low_min": 0.0002463115406499128, "clip_ratio/region_mean": 0.002521915434044786, "epoch": 0.0006533317000040833, "grad_norm": 0.13714255392551422, "learning_rate": 1e-06, "loss": 0.038, "step": 7 }, { "clip_ratio/high_max": 0.00270877756702248, "clip_ratio/high_mean": 0.0012838548318541143, "clip_ratio/low_mean": 0.0012081002714694478, "clip_ratio/low_min": 0.00016637541557429358, "clip_ratio/region_mean": 0.002491955085133668, "epoch": 0.0007466648000046666, "grad_norm": 0.13007622957229614, "learning_rate": 1e-06, "loss": -0.0221, "step": 8 }, { "clip_ratio/high_max": 0.0024598598756711, "clip_ratio/high_mean": 0.001140238789957948, "clip_ratio/low_mean": 0.0014735562181158457, "clip_ratio/low_min": 8.273639014078071e-05, "clip_ratio/region_mean": 0.0026137950335396454, "epoch": 0.00083999790000525, "grad_norm": 0.12082390487194061, "learning_rate": 1e-06, "loss": 0.0555, "step": 9 }, { "clip_ratio/high_max": 0.002673635055543855, "clip_ratio/high_mean": 0.001122347173804883, "clip_ratio/low_mean": 0.0012391256059345324, "clip_ratio/low_min": 9.760110151546542e-05, "clip_ratio/region_mean": 0.0023614727542735636, "epoch": 0.0009333310000058334, "grad_norm": 0.12336327880620956, "learning_rate": 1e-06, "loss": -0.0016, "step": 10 }, { "clip_ratio/high_max": 0.002751210682617966, "clip_ratio/high_mean": 0.0011633767971943598, "clip_ratio/low_mean": 0.0012193960355944, "clip_ratio/low_min": 0.00019072842951572966, "clip_ratio/region_mean": 0.0023827728000469506, "epoch": 0.0010266641000064166, "grad_norm": 0.1197395846247673, "learning_rate": 1e-06, "loss": 0.0628, "step": 11 }, { "clip_ratio/high_max": 0.002759794377197977, "clip_ratio/high_mean": 0.0012307062424952164, "clip_ratio/low_mean": 0.001164543969935039, "clip_ratio/low_min": 0.00011344149243086576, "clip_ratio/region_mean": 0.002395250165136531, "epoch": 0.001119997200007, "grad_norm": 0.1405230462551117, "learning_rate": 1e-06, "loss": 0.0137, "step": 12 }, { "clip_ratio/high_max": 0.0023893758188933134, "clip_ratio/high_mean": 0.0011513511381053831, "clip_ratio/low_mean": 0.0011078872103098547, "clip_ratio/low_min": 0.00021513757565116975, "clip_ratio/region_mean": 0.002259238339320291, "epoch": 0.0012133303000075833, "grad_norm": 0.11387713998556137, "learning_rate": 1e-06, "loss": -0.0032, "step": 13 }, { "clip_ratio/high_max": 0.0024001618730835617, "clip_ratio/high_mean": 0.0010402810585219413, "clip_ratio/low_mean": 0.0011266607652942184, "clip_ratio/low_min": 0.00023190594765765127, "clip_ratio/region_mean": 0.0021669417328666896, "epoch": 0.0013066634000081666, "grad_norm": 0.11974268406629562, "learning_rate": 1e-06, "loss": 0.037, "step": 14 }, { "clip_ratio/high_max": 0.0024508071510354057, "clip_ratio/high_mean": 0.001112225349061191, "clip_ratio/low_mean": 0.0011875995078298729, "clip_ratio/low_min": 0.00025513387208775384, "clip_ratio/region_mean": 0.0022998248678050004, "epoch": 0.00139999650000875, "grad_norm": 0.12656979262828827, "learning_rate": 1e-06, "loss": 0.0348, "step": 15 }, { "clip_ratio/high_max": 0.002322030508366879, "clip_ratio/high_mean": 0.0010483957048563752, "clip_ratio/low_mean": 0.000957322492467938, "clip_ratio/low_min": 0.00015151336992857978, "clip_ratio/region_mean": 0.0020057182118762285, "epoch": 0.0014933296000093333, "grad_norm": 0.11600138247013092, "learning_rate": 1e-06, "loss": 0.0158, "step": 16 }, { "clip_ratio/high_max": 0.0026436500920681283, "clip_ratio/high_mean": 0.001141931970778387, "clip_ratio/low_mean": 0.0010170992245548405, "clip_ratio/low_min": 0.00010865482090594014, "clip_ratio/region_mean": 0.0021590311807813123, "epoch": 0.0015866627000099165, "grad_norm": 0.1152438074350357, "learning_rate": 1e-06, "loss": 0.0108, "step": 17 }, { "clip_ratio/high_max": 0.002515043524908833, "clip_ratio/high_mean": 0.0011454090708866715, "clip_ratio/low_mean": 0.0010353123507229611, "clip_ratio/low_min": 0.0001434843188690138, "clip_ratio/region_mean": 0.0021807214216096327, "epoch": 0.0016799958000105, "grad_norm": 0.12648127973079681, "learning_rate": 1e-06, "loss": -0.0031, "step": 18 }, { "clip_ratio/high_max": 0.002629626455018297, "clip_ratio/high_mean": 0.0010910109886026476, "clip_ratio/low_mean": 0.0010667181413737126, "clip_ratio/low_min": 0.0001797896029529511, "clip_ratio/region_mean": 0.0021577291627181694, "epoch": 0.0017733289000110833, "grad_norm": 0.11609940230846405, "learning_rate": 1e-06, "loss": -0.0109, "step": 19 }, { "clip_ratio/high_max": 0.0027949537980020978, "clip_ratio/high_mean": 0.0011497604682517704, "clip_ratio/low_mean": 0.0012121964937250596, "clip_ratio/low_min": 0.00023940318169479724, "clip_ratio/region_mean": 0.002361956925597042, "epoch": 0.0018666620000116667, "grad_norm": 0.12692219018936157, "learning_rate": 1e-06, "loss": 0.0344, "step": 20 }, { "clip_ratio/high_max": 0.002589399264252279, "clip_ratio/high_mean": 0.0011234683042857796, "clip_ratio/low_mean": 0.0010738386808952782, "clip_ratio/low_min": 0.00018489967715140665, "clip_ratio/region_mean": 0.002197307017922867, "epoch": 0.00195999510001225, "grad_norm": 0.13158082962036133, "learning_rate": 1e-06, "loss": 0.0449, "step": 21 }, { "clip_ratio/high_max": 0.0024172619087039493, "clip_ratio/high_mean": 0.0011121215211460367, "clip_ratio/low_mean": 0.0012496604249463417, "clip_ratio/low_min": 0.0001313423508690903, "clip_ratio/region_mean": 0.002361781931540463, "epoch": 0.002053328200012833, "grad_norm": 0.12087717652320862, "learning_rate": 1e-06, "loss": 0.0789, "step": 22 }, { "clip_ratio/high_max": 0.002466805191943422, "clip_ratio/high_mean": 0.0011839956532639917, "clip_ratio/low_mean": 0.001536189029138768, "clip_ratio/low_min": 0.00026916667411569506, "clip_ratio/region_mean": 0.0027201846896787174, "epoch": 0.0021466613000134167, "grad_norm": 0.12805938720703125, "learning_rate": 1e-06, "loss": 0.0351, "step": 23 }, { "clip_ratio/high_max": 0.002562341578595806, "clip_ratio/high_mean": 0.0011825717610918218, "clip_ratio/low_mean": 0.0013199982458900195, "clip_ratio/low_min": 0.00024161395504052052, "clip_ratio/region_mean": 0.002502569965145085, "epoch": 0.002239994400014, "grad_norm": 0.12345980107784271, "learning_rate": 1e-06, "loss": -0.0054, "step": 24 }, { "clip_ratio/high_max": 0.002660014637513086, "clip_ratio/high_mean": 0.001104355113056954, "clip_ratio/low_mean": 0.001319493276241701, "clip_ratio/low_min": 0.0001816941785364179, "clip_ratio/region_mean": 0.002423848389298655, "epoch": 0.002333327500014583, "grad_norm": 0.1219700425863266, "learning_rate": 1e-06, "loss": 0.0513, "step": 25 }, { "clip_ratio/high_max": 0.002305815323779825, "clip_ratio/high_mean": 0.00100868127810827, "clip_ratio/low_mean": 0.0013425168399407994, "clip_ratio/low_min": 0.00027819257229566574, "clip_ratio/region_mean": 0.002351198061660398, "epoch": 0.0024266606000151666, "grad_norm": 0.1232110783457756, "learning_rate": 1e-06, "loss": 0.0757, "step": 26 }, { "clip_ratio/high_max": 0.0026411455910420045, "clip_ratio/high_mean": 0.0010462454229127616, "clip_ratio/low_mean": 0.0014150662900647148, "clip_ratio/low_min": 0.0003456768763498985, "clip_ratio/region_mean": 0.0024613117275293916, "epoch": 0.00251999370001575, "grad_norm": 0.11845528334379196, "learning_rate": 1e-06, "loss": 0.0542, "step": 27 }, { "clip_ratio/high_max": 0.002325190529518295, "clip_ratio/high_mean": 0.0009594632647349499, "clip_ratio/low_mean": 0.0014218334035831504, "clip_ratio/low_min": 0.0002624542821649811, "clip_ratio/region_mean": 0.0023812966464902274, "epoch": 0.002613326800016333, "grad_norm": 0.11605717986822128, "learning_rate": 1e-06, "loss": 0.0625, "step": 28 }, { "clip_ratio/high_max": 0.0030251408825279213, "clip_ratio/high_mean": 0.0013527981900551822, "clip_ratio/low_mean": 0.001140159150963882, "clip_ratio/low_min": 0.00011908031501661753, "clip_ratio/region_mean": 0.0024929573337431066, "epoch": 0.0027066599000169166, "grad_norm": 0.12137246131896973, "learning_rate": 1e-06, "loss": -0.0538, "step": 29 }, { "clip_ratio/high_max": 0.0022979435307206586, "clip_ratio/high_mean": 0.0009442981317988597, "clip_ratio/low_mean": 0.0014302614436019212, "clip_ratio/low_min": 0.0002501899180060718, "clip_ratio/region_mean": 0.002374559626332484, "epoch": 0.0027999930000175, "grad_norm": 0.1159074679017067, "learning_rate": 1e-06, "loss": 0.0721, "step": 30 }, { "clip_ratio/high_max": 0.0025673703785287216, "clip_ratio/high_mean": 0.0012295757660467643, "clip_ratio/low_mean": 0.0013573341093433555, "clip_ratio/low_min": 0.00016855412104632705, "clip_ratio/region_mean": 0.0025869098681141622, "epoch": 0.002893326100018083, "grad_norm": 0.1223529651761055, "learning_rate": 1e-06, "loss": 0.0047, "step": 31 }, { "clip_ratio/high_max": 0.0025453735506744124, "clip_ratio/high_mean": 0.0011597320954024326, "clip_ratio/low_mean": 0.0013843528722645715, "clip_ratio/low_min": 0.00021935500444669742, "clip_ratio/region_mean": 0.002544085022236686, "epoch": 0.0029866592000186666, "grad_norm": 0.12275739014148712, "learning_rate": 1e-06, "loss": 0.0431, "step": 32 }, { "clip_ratio/high_max": 0.002729063700826373, "clip_ratio/high_mean": 0.0012258453971298877, "clip_ratio/low_mean": 0.001404339254804654, "clip_ratio/low_min": 0.0003080388714806759, "clip_ratio/region_mean": 0.0026301846155547537, "epoch": 0.00307999230001925, "grad_norm": 0.13245970010757446, "learning_rate": 1e-06, "loss": 0.0388, "step": 33 }, { "clip_ratio/high_max": 0.0025372079908265732, "clip_ratio/high_mean": 0.001119392465625424, "clip_ratio/low_mean": 0.0013647336709254887, "clip_ratio/low_min": 0.00018539652683102759, "clip_ratio/region_mean": 0.0024841261983965524, "epoch": 0.003173325400019833, "grad_norm": 0.18169943988323212, "learning_rate": 1e-06, "loss": 0.0276, "step": 34 }, { "clip_ratio/high_max": 0.002517153181543108, "clip_ratio/high_mean": 0.001101550049497746, "clip_ratio/low_mean": 0.001335564760665875, "clip_ratio/low_min": 0.0001049614775183727, "clip_ratio/region_mean": 0.002437114824715536, "epoch": 0.0032666585000204165, "grad_norm": 0.12524020671844482, "learning_rate": 1e-06, "loss": -0.0019, "step": 35 }, { "clip_ratio/high_max": 0.0025958751866710372, "clip_ratio/high_mean": 0.0011561296778381802, "clip_ratio/low_mean": 0.0013680337942787446, "clip_ratio/low_min": 0.00021737143106292933, "clip_ratio/region_mean": 0.0025241635012207553, "epoch": 0.003359991600021, "grad_norm": 0.12481582909822464, "learning_rate": 1e-06, "loss": 0.0143, "step": 36 }, { "clip_ratio/high_max": 0.0025699134785099886, "clip_ratio/high_mean": 0.001172711308754515, "clip_ratio/low_mean": 0.0012586960001499392, "clip_ratio/low_min": 0.0003111901296506403, "clip_ratio/region_mean": 0.0024314072798006237, "epoch": 0.0034533247000215835, "grad_norm": 0.12213467806577682, "learning_rate": 1e-06, "loss": 0.0051, "step": 37 }, { "clip_ratio/high_max": 0.0026844480526051484, "clip_ratio/high_mean": 0.0012004291784251109, "clip_ratio/low_mean": 0.0011842669009638485, "clip_ratio/low_min": 0.00024278982982650632, "clip_ratio/region_mean": 0.0023846960903028958, "epoch": 0.0035466578000221665, "grad_norm": 0.11984220147132874, "learning_rate": 1e-06, "loss": -0.0045, "step": 38 }, { "clip_ratio/high_max": 0.0027357147337170318, "clip_ratio/high_mean": 0.001222223239892628, "clip_ratio/low_mean": 0.0014722723135491833, "clip_ratio/low_min": 0.0002249274493806297, "clip_ratio/region_mean": 0.0026944955898215994, "epoch": 0.00363999090002275, "grad_norm": 0.1276833415031433, "learning_rate": 1e-06, "loss": 0.0606, "step": 39 }, { "clip_ratio/high_max": 0.0024828402092680335, "clip_ratio/high_mean": 0.0012137264875491383, "clip_ratio/low_mean": 0.001327302736171987, "clip_ratio/low_min": 0.000284347188880929, "clip_ratio/region_mean": 0.002541029251005966, "epoch": 0.0037333240000233334, "grad_norm": 0.11976026743650436, "learning_rate": 1e-06, "loss": 0.011, "step": 40 }, { "clip_ratio/high_max": 0.002808928315062076, "clip_ratio/high_mean": 0.0012745399326377083, "clip_ratio/low_mean": 0.001354664233076619, "clip_ratio/low_min": 0.00014309158814285183, "clip_ratio/region_mean": 0.002629204209370073, "epoch": 0.0038266571000239165, "grad_norm": 0.12221518903970718, "learning_rate": 1e-06, "loss": 0.0068, "step": 41 }, { "clip_ratio/high_max": 0.0030057493568165228, "clip_ratio/high_mean": 0.0014087753152125515, "clip_ratio/low_mean": 0.0011849932416225784, "clip_ratio/low_min": 0.0001294406010856619, "clip_ratio/region_mean": 0.0025937685859389603, "epoch": 0.0039199902000245, "grad_norm": 0.12715640664100647, "learning_rate": 1e-06, "loss": -0.0756, "step": 42 }, { "clip_ratio/high_max": 0.0026467816132935695, "clip_ratio/high_mean": 0.0011393803033570293, "clip_ratio/low_mean": 0.0012746798092848621, "clip_ratio/low_min": 0.0002086070753648528, "clip_ratio/region_mean": 0.002414060159935616, "epoch": 0.004013323300025083, "grad_norm": 0.1203455999493599, "learning_rate": 1e-06, "loss": 0.0226, "step": 43 }, { "clip_ratio/high_max": 0.002541160887631122, "clip_ratio/high_mean": 0.0012985061184735969, "clip_ratio/low_mean": 0.0012353502861515153, "clip_ratio/low_min": 0.00020070204118383117, "clip_ratio/region_mean": 0.0025338564228150062, "epoch": 0.004106656400025666, "grad_norm": 0.12440450489521027, "learning_rate": 1e-06, "loss": 0.0369, "step": 44 }, { "clip_ratio/high_max": 0.0027139170488226227, "clip_ratio/high_mean": 0.0011793430185207399, "clip_ratio/low_mean": 0.001420053582478431, "clip_ratio/low_min": 0.00012604999665200012, "clip_ratio/region_mean": 0.0025993965755333193, "epoch": 0.00419998950002625, "grad_norm": 0.1202644407749176, "learning_rate": 1e-06, "loss": 0.0656, "step": 45 }, { "clip_ratio/high_max": 0.002754934292170219, "clip_ratio/high_mean": 0.0012010007740173023, "clip_ratio/low_mean": 0.0014191860100254416, "clip_ratio/low_min": 0.0002793925368678174, "clip_ratio/region_mean": 0.0026201868167845532, "epoch": 0.004293322600026833, "grad_norm": 0.11849617958068848, "learning_rate": 1e-06, "loss": -0.0504, "step": 46 }, { "clip_ratio/high_max": 0.0024550102098146453, "clip_ratio/high_mean": 0.0010567437020654324, "clip_ratio/low_mean": 0.0014334456500364468, "clip_ratio/low_min": 0.00017170624414575286, "clip_ratio/region_mean": 0.0024901893120841123, "epoch": 0.004386655700027417, "grad_norm": 0.11086788028478622, "learning_rate": 1e-06, "loss": 0.0513, "step": 47 }, { "clip_ratio/high_max": 0.002958431141451001, "clip_ratio/high_mean": 0.0012115780664316844, "clip_ratio/low_mean": 0.0012065150658600032, "clip_ratio/low_min": 0.0001335978704446461, "clip_ratio/region_mean": 0.002418093128653709, "epoch": 0.004479988800028, "grad_norm": 0.1252577304840088, "learning_rate": 1e-06, "loss": 0.0062, "step": 48 }, { "clip_ratio/high_max": 0.0022706098461640067, "clip_ratio/high_mean": 0.0010665518675523344, "clip_ratio/low_mean": 0.0014747400309715886, "clip_ratio/low_min": 0.00020870684238616377, "clip_ratio/region_mean": 0.0025412919057998806, "epoch": 0.004573321900028583, "grad_norm": 0.10884135961532593, "learning_rate": 1e-06, "loss": 0.0236, "step": 49 }, { "clip_ratio/high_max": 0.002185671604820527, "clip_ratio/high_mean": 0.001060843529558042, "clip_ratio/low_mean": 0.0013342364218260627, "clip_ratio/low_min": 0.00022071847206461825, "clip_ratio/region_mean": 0.00239507996593602, "epoch": 0.004666655000029166, "grad_norm": 0.12361767143011093, "learning_rate": 1e-06, "loss": 0.0313, "step": 50 }, { "clip_ratio/high_max": 0.0027597283042268828, "clip_ratio/high_mean": 0.0011761163605115144, "clip_ratio/low_mean": 0.001213630408528843, "clip_ratio/low_min": 0.0001908261219796259, "clip_ratio/region_mean": 0.002389746812696103, "epoch": 0.00475998810002975, "grad_norm": 0.11355671286582947, "learning_rate": 1e-06, "loss": 0.0207, "step": 51 }, { "clip_ratio/high_max": 0.0024905576574383304, "clip_ratio/high_mean": 0.0011331064633850474, "clip_ratio/low_mean": 0.0013893898321839515, "clip_ratio/low_min": 0.0003023253630090039, "clip_ratio/region_mean": 0.0025224962664651684, "epoch": 0.004853321200030333, "grad_norm": 0.12120325118303299, "learning_rate": 1e-06, "loss": 0.0473, "step": 52 }, { "clip_ratio/high_max": 0.002222864786745049, "clip_ratio/high_mean": 0.0011238621809752658, "clip_ratio/low_mean": 0.0012951597236678936, "clip_ratio/low_min": 0.0002244625247840304, "clip_ratio/region_mean": 0.002419021948298905, "epoch": 0.004946654300030917, "grad_norm": 0.11711551249027252, "learning_rate": 1e-06, "loss": -0.004, "step": 53 }, { "clip_ratio/high_max": 0.002689632492547389, "clip_ratio/high_mean": 0.0012400245395838283, "clip_ratio/low_mean": 0.0014652921454398893, "clip_ratio/low_min": 0.00025164604903693544, "clip_ratio/region_mean": 0.002705316714127548, "epoch": 0.0050399874000315, "grad_norm": 0.12448856979608536, "learning_rate": 1e-06, "loss": 0.0183, "step": 54 }, { "clip_ratio/high_max": 0.0023022337845759466, "clip_ratio/high_mean": 0.0011531529162311926, "clip_ratio/low_mean": 0.001143354749729042, "clip_ratio/low_min": 0.0002030430341619649, "clip_ratio/region_mean": 0.0022965076423133723, "epoch": 0.005133320500032083, "grad_norm": 0.11327353119850159, "learning_rate": 1e-06, "loss": 0.0141, "step": 55 }, { "clip_ratio/high_max": 0.0029010622238274664, "clip_ratio/high_mean": 0.0012158983372501098, "clip_ratio/low_mean": 0.0012990137256565504, "clip_ratio/low_min": 0.0001827138557928265, "clip_ratio/region_mean": 0.002514912099286448, "epoch": 0.005226653600032666, "grad_norm": 0.13181783258914948, "learning_rate": 1e-06, "loss": 0.0205, "step": 56 }, { "clip_ratio/high_max": 0.0027543388096091803, "clip_ratio/high_mean": 0.0011148851735924836, "clip_ratio/low_mean": 0.0013057968244538642, "clip_ratio/low_min": 0.00020840396882704226, "clip_ratio/region_mean": 0.002420682030788157, "epoch": 0.00531998670003325, "grad_norm": 0.12502296268939972, "learning_rate": 1e-06, "loss": 0.0735, "step": 57 }, { "clip_ratio/high_max": 0.0027627505696727894, "clip_ratio/high_mean": 0.0012729899899568409, "clip_ratio/low_mean": 0.0013732074221479706, "clip_ratio/low_min": 0.00019152652657794533, "clip_ratio/region_mean": 0.002646197346621193, "epoch": 0.005413319800033833, "grad_norm": 0.12117017805576324, "learning_rate": 1e-06, "loss": 0.038, "step": 58 }, { "clip_ratio/high_max": 0.0026648359271348454, "clip_ratio/high_mean": 0.0012008250414510258, "clip_ratio/low_mean": 0.0013067040526948404, "clip_ratio/low_min": 0.00021925389592070132, "clip_ratio/region_mean": 0.002507529075955972, "epoch": 0.005506652900034417, "grad_norm": 0.13083091378211975, "learning_rate": 1e-06, "loss": -0.0015, "step": 59 }, { "clip_ratio/high_max": 0.0023332468190346844, "clip_ratio/high_mean": 0.0011102884272986557, "clip_ratio/low_mean": 0.0014840043040749151, "clip_ratio/low_min": 0.0001599972110852832, "clip_ratio/region_mean": 0.002594292745925486, "epoch": 0.005599986000035, "grad_norm": 0.11433768272399902, "learning_rate": 1e-06, "loss": 0.005, "step": 60 }, { "clip_ratio/high_max": 0.0028035322538926266, "clip_ratio/high_mean": 0.0011521758897288237, "clip_ratio/low_mean": 0.0013908127111790236, "clip_ratio/low_min": 0.0002726918983171345, "clip_ratio/region_mean": 0.0025429886154597625, "epoch": 0.005693319100035584, "grad_norm": 0.11670524626970291, "learning_rate": 1e-06, "loss": 0.0281, "step": 61 }, { "clip_ratio/high_max": 0.002536535459512379, "clip_ratio/high_mean": 0.00121431296429364, "clip_ratio/low_mean": 0.0012722949650196824, "clip_ratio/low_min": 0.00022249042103794636, "clip_ratio/region_mean": 0.0024866079329513013, "epoch": 0.005786652200036166, "grad_norm": 0.12424971908330917, "learning_rate": 1e-06, "loss": 0.019, "step": 62 }, { "clip_ratio/high_max": 0.0025815168119152077, "clip_ratio/high_mean": 0.0012384331566863693, "clip_ratio/low_mean": 0.0012156461416452657, "clip_ratio/low_min": 0.00012347833853709744, "clip_ratio/region_mean": 0.002454079338349402, "epoch": 0.00587998530003675, "grad_norm": 0.10702453553676605, "learning_rate": 1e-06, "loss": -0.0233, "step": 63 }, { "clip_ratio/high_max": 0.0031096722559595946, "clip_ratio/high_mean": 0.0011724801315722289, "clip_ratio/low_mean": 0.0015286608031601645, "clip_ratio/low_min": 0.0003675774805742549, "clip_ratio/region_mean": 0.0027011409183614887, "epoch": 0.005973318400037333, "grad_norm": 0.11808144301176071, "learning_rate": 1e-06, "loss": 0.0507, "step": 64 }, { "clip_ratio/high_max": 0.002826274634571746, "clip_ratio/high_mean": 0.0011323906401230488, "clip_ratio/low_mean": 0.0016584002878516912, "clip_ratio/low_min": 0.00033293380329268984, "clip_ratio/region_mean": 0.002790790909784846, "epoch": 0.006066651500037917, "grad_norm": 0.11598782241344452, "learning_rate": 1e-06, "loss": 0.0458, "step": 65 }, { "clip_ratio/high_max": 0.0024665794771863148, "clip_ratio/high_mean": 0.001183727457828354, "clip_ratio/low_mean": 0.001549983357108431, "clip_ratio/low_min": 0.00017830143406172283, "clip_ratio/region_mean": 0.0027337107458151877, "epoch": 0.0061599846000385, "grad_norm": 0.11802730709314346, "learning_rate": 1e-06, "loss": 0.0703, "step": 66 }, { "clip_ratio/high_max": 0.0034556466853246093, "clip_ratio/high_mean": 0.0015078558899404015, "clip_ratio/low_mean": 0.0015664736711187288, "clip_ratio/low_min": 0.00029567185265477747, "clip_ratio/region_mean": 0.003074329550145194, "epoch": 0.0062533177000390835, "grad_norm": 0.12435410171747208, "learning_rate": 1e-06, "loss": 0.0157, "step": 67 }, { "clip_ratio/high_max": 0.002682604972505942, "clip_ratio/high_mean": 0.0011244208089919994, "clip_ratio/low_mean": 0.0014367879375640769, "clip_ratio/low_min": 0.00015121962132980116, "clip_ratio/region_mean": 0.0025612086901674047, "epoch": 0.006346650800039666, "grad_norm": 0.1238340437412262, "learning_rate": 1e-06, "loss": 0.0282, "step": 68 }, { "clip_ratio/high_max": 0.0028364117169985548, "clip_ratio/high_mean": 0.0012174231796961976, "clip_ratio/low_mean": 0.0014808492487645708, "clip_ratio/low_min": 0.0003865883045364171, "clip_ratio/region_mean": 0.002698272481211461, "epoch": 0.00643998390004025, "grad_norm": 0.1309591382741928, "learning_rate": 1e-06, "loss": -0.0123, "step": 69 }, { "clip_ratio/high_max": 0.0028682243209914304, "clip_ratio/high_mean": 0.00127961009820865, "clip_ratio/low_mean": 0.0013721636423724703, "clip_ratio/low_min": 0.00014195374387782067, "clip_ratio/region_mean": 0.0026517737496760674, "epoch": 0.006533317000040833, "grad_norm": 0.12093537300825119, "learning_rate": 1e-06, "loss": -0.0167, "step": 70 }, { "clip_ratio/high_max": 0.0026145253286813386, "clip_ratio/high_mean": 0.0011623787504504435, "clip_ratio/low_mean": 0.0013004832762817387, "clip_ratio/low_min": 0.00023329568102781195, "clip_ratio/region_mean": 0.002462862044922076, "epoch": 0.0066266501000414165, "grad_norm": 0.11307942122220993, "learning_rate": 1e-06, "loss": 0.0217, "step": 71 }, { "clip_ratio/high_max": 0.0029586045056930743, "clip_ratio/high_mean": 0.001300655978411669, "clip_ratio/low_mean": 0.0012729625268548261, "clip_ratio/low_min": 0.00015535668353550136, "clip_ratio/region_mean": 0.002573618483438622, "epoch": 0.006719983200042, "grad_norm": 0.1201862320303917, "learning_rate": 1e-06, "loss": 0.0153, "step": 72 }, { "clip_ratio/high_max": 0.0025655183562776074, "clip_ratio/high_mean": 0.0011379388015484437, "clip_ratio/low_mean": 0.0013663674253621139, "clip_ratio/low_min": 0.00034249923373863567, "clip_ratio/region_mean": 0.002504306197806727, "epoch": 0.0068133163000425835, "grad_norm": 0.12244287133216858, "learning_rate": 1e-06, "loss": 0.0368, "step": 73 }, { "clip_ratio/high_max": 0.0027429223846411332, "clip_ratio/high_mean": 0.0010778947798826266, "clip_ratio/low_mean": 0.0013019870566495229, "clip_ratio/low_min": 0.00017331588605884463, "clip_ratio/region_mean": 0.00237988186563598, "epoch": 0.006906649400043167, "grad_norm": 0.11468847841024399, "learning_rate": 1e-06, "loss": 0.0001, "step": 74 }, { "clip_ratio/high_max": 0.0026020435980171897, "clip_ratio/high_mean": 0.0012703805841738358, "clip_ratio/low_mean": 0.0014648977921751793, "clip_ratio/low_min": 0.0001831877998483833, "clip_ratio/region_mean": 0.002735278438194655, "epoch": 0.0069999825000437495, "grad_norm": 0.11755255609750748, "learning_rate": 1e-06, "loss": 0.0189, "step": 75 }, { "clip_ratio/high_max": 0.002817681059241295, "clip_ratio/high_mean": 0.0013301539620442782, "clip_ratio/low_mean": 0.0013364174410526175, "clip_ratio/low_min": 0.00011838049067591783, "clip_ratio/region_mean": 0.0026665713521651924, "epoch": 0.007093315600044333, "grad_norm": 0.11992045491933823, "learning_rate": 1e-06, "loss": -0.0544, "step": 76 }, { "clip_ratio/high_max": 0.002334528460778529, "clip_ratio/high_mean": 0.0011476118379505351, "clip_ratio/low_mean": 0.0015280926272680517, "clip_ratio/low_min": 0.00030930779666960007, "clip_ratio/region_mean": 0.0026757044688565657, "epoch": 0.0071866487000449165, "grad_norm": 0.12068290263414383, "learning_rate": 1e-06, "loss": 0.0294, "step": 77 }, { "clip_ratio/high_max": 0.0027745606494136155, "clip_ratio/high_mean": 0.0012665794092754368, "clip_ratio/low_mean": 0.0013899393161409535, "clip_ratio/low_min": 0.0002143612400686834, "clip_ratio/region_mean": 0.0026565187436062843, "epoch": 0.0072799818000455, "grad_norm": 0.11019036918878555, "learning_rate": 1e-06, "loss": 0.0042, "step": 78 }, { "clip_ratio/high_max": 0.002435784030240029, "clip_ratio/high_mean": 0.0012236549646331696, "clip_ratio/low_mean": 0.0014473684277618304, "clip_ratio/low_min": 0.00024480762249368127, "clip_ratio/region_mean": 0.0026710233869380318, "epoch": 0.007373314900046083, "grad_norm": 0.12387555092573166, "learning_rate": 1e-06, "loss": -0.0001, "step": 79 }, { "clip_ratio/high_max": 0.002483940726961009, "clip_ratio/high_mean": 0.0010340837689000182, "clip_ratio/low_mean": 0.0013392387554631568, "clip_ratio/low_min": 0.00023337047696259106, "clip_ratio/region_mean": 0.002373322487983387, "epoch": 0.007466648000046667, "grad_norm": 0.11911188066005707, "learning_rate": 1e-06, "loss": -0.0016, "step": 80 }, { "clip_ratio/high_max": 0.002534215891500935, "clip_ratio/high_mean": 0.001200334412715165, "clip_ratio/low_mean": 0.001505252341303276, "clip_ratio/low_min": 0.0003896178604918532, "clip_ratio/region_mean": 0.0027055867831222713, "epoch": 0.0075599811000472495, "grad_norm": 0.12401816993951797, "learning_rate": 1e-06, "loss": 0.0511, "step": 81 }, { "clip_ratio/high_max": 0.0027460782512207516, "clip_ratio/high_mean": 0.001146390273788711, "clip_ratio/low_mean": 0.0013218955646152608, "clip_ratio/low_min": 0.0001522299744465272, "clip_ratio/region_mean": 0.0024682858420419507, "epoch": 0.007653314200047833, "grad_norm": 0.1271621733903885, "learning_rate": 1e-06, "loss": -0.0355, "step": 82 }, { "clip_ratio/high_max": 0.0027357460858183913, "clip_ratio/high_mean": 0.0012101973879907746, "clip_ratio/low_mean": 0.0015687971099396236, "clip_ratio/low_min": 0.0002497866480553057, "clip_ratio/region_mean": 0.0027789945161202922, "epoch": 0.007746647300048416, "grad_norm": 0.12573960423469543, "learning_rate": 1e-06, "loss": 0.0232, "step": 83 }, { "clip_ratio/high_max": 0.0027602964473771863, "clip_ratio/high_mean": 0.0014019418449606746, "clip_ratio/low_mean": 0.0015700076110078953, "clip_ratio/low_min": 0.000192812726709235, "clip_ratio/region_mean": 0.0029719493904849514, "epoch": 0.007839980400049, "grad_norm": 0.12834490835666656, "learning_rate": 1e-06, "loss": -0.0236, "step": 84 }, { "clip_ratio/high_max": 0.002873798403015826, "clip_ratio/high_mean": 0.0012417943034961354, "clip_ratio/low_mean": 0.0014508737658616155, "clip_ratio/low_min": 0.00036596354493667604, "clip_ratio/region_mean": 0.002692668109375518, "epoch": 0.007933313500049582, "grad_norm": 0.12216285616159439, "learning_rate": 1e-06, "loss": -0.0186, "step": 85 }, { "clip_ratio/high_max": 0.0031415919147548266, "clip_ratio/high_mean": 0.0013476370768330526, "clip_ratio/low_mean": 0.001391533442074433, "clip_ratio/low_min": 0.0001743201992212562, "clip_ratio/region_mean": 0.0027391705225454643, "epoch": 0.008026646600050166, "grad_norm": 0.12674564123153687, "learning_rate": 1e-06, "loss": -0.0234, "step": 86 }, { "clip_ratio/high_max": 0.0026021780795417726, "clip_ratio/high_mean": 0.0011719479552994017, "clip_ratio/low_mean": 0.0015808066455065273, "clip_ratio/low_min": 0.00028470718189055333, "clip_ratio/region_mean": 0.0027527546044439077, "epoch": 0.00811997970005075, "grad_norm": 0.12727433443069458, "learning_rate": 1e-06, "loss": 0.0085, "step": 87 }, { "clip_ratio/high_max": 0.0026504448251216672, "clip_ratio/high_mean": 0.0013693557848455384, "clip_ratio/low_mean": 0.0014677377839689143, "clip_ratio/low_min": 0.00018616943725646706, "clip_ratio/region_mean": 0.002837093561538495, "epoch": 0.008213312800051333, "grad_norm": 0.12803950905799866, "learning_rate": 1e-06, "loss": -0.0057, "step": 88 }, { "clip_ratio/high_max": 0.0026206430775346234, "clip_ratio/high_mean": 0.0012244423414813355, "clip_ratio/low_mean": 0.001532272457552608, "clip_ratio/low_min": 0.00022172410626808414, "clip_ratio/region_mean": 0.0027567148572416045, "epoch": 0.008306645900051916, "grad_norm": 0.11617273092269897, "learning_rate": 1e-06, "loss": 0.0314, "step": 89 }, { "clip_ratio/high_max": 0.002430669810564723, "clip_ratio/high_mean": 0.0010987007117364556, "clip_ratio/low_mean": 0.0014864155782561284, "clip_ratio/low_min": 0.00034380300076009007, "clip_ratio/region_mean": 0.0025851162936305627, "epoch": 0.0083999790000525, "grad_norm": 0.11640790104866028, "learning_rate": 1e-06, "loss": 0.062, "step": 90 }, { "clip_ratio/high_max": 0.0027008131437469274, "clip_ratio/high_mean": 0.0012744264058710542, "clip_ratio/low_mean": 0.0014847731108602602, "clip_ratio/low_min": 0.00027809551465907134, "clip_ratio/region_mean": 0.0027591994585236534, "epoch": 0.008493312100053083, "grad_norm": 0.12253836542367935, "learning_rate": 1e-06, "loss": 0.0247, "step": 91 }, { "clip_ratio/high_max": 0.0030946138067520224, "clip_ratio/high_mean": 0.0012652283439820167, "clip_ratio/low_mean": 0.0015399536132463254, "clip_ratio/low_min": 0.0002860111262634746, "clip_ratio/region_mean": 0.0028051819099346176, "epoch": 0.008586645200053667, "grad_norm": 0.1208665519952774, "learning_rate": 1e-06, "loss": -0.0231, "step": 92 }, { "clip_ratio/high_max": 0.0029941294342279434, "clip_ratio/high_mean": 0.0013417743321042508, "clip_ratio/low_mean": 0.001346117518551182, "clip_ratio/low_min": 0.00023732907084195176, "clip_ratio/region_mean": 0.0026878918142756447, "epoch": 0.00867997830005425, "grad_norm": 0.1292804777622223, "learning_rate": 1e-06, "loss": 0.0164, "step": 93 }, { "clip_ratio/high_max": 0.0029322607879294083, "clip_ratio/high_mean": 0.0012803628378605936, "clip_ratio/low_mean": 0.0016392632605857216, "clip_ratio/low_min": 0.0003152219724142924, "clip_ratio/region_mean": 0.0029196261311881244, "epoch": 0.008773311400054834, "grad_norm": 0.1228199377655983, "learning_rate": 1e-06, "loss": 0.0645, "step": 94 }, { "clip_ratio/high_max": 0.0025236464352929033, "clip_ratio/high_mean": 0.0012460195575840771, "clip_ratio/low_mean": 0.001547534189739963, "clip_ratio/low_min": 0.0002313568111276254, "clip_ratio/region_mean": 0.002793553765513934, "epoch": 0.008866644500055417, "grad_norm": 0.11896580457687378, "learning_rate": 1e-06, "loss": 0.023, "step": 95 }, { "clip_ratio/high_max": 0.0024031530047068372, "clip_ratio/high_mean": 0.0011397364978620317, "clip_ratio/low_mean": 0.001470630151743535, "clip_ratio/low_min": 0.0001873053420240467, "clip_ratio/region_mean": 0.002610366602311842, "epoch": 0.008959977600056, "grad_norm": 0.11692328751087189, "learning_rate": 1e-06, "loss": 0.0027, "step": 96 }, { "clip_ratio/high_max": 0.002619394777866546, "clip_ratio/high_mean": 0.001199335394630907, "clip_ratio/low_mean": 0.0015835727390367538, "clip_ratio/low_min": 0.0002894433882829617, "clip_ratio/region_mean": 0.0027829081373056397, "epoch": 0.009053310700056582, "grad_norm": 0.12827742099761963, "learning_rate": 1e-06, "loss": 0.0461, "step": 97 }, { "clip_ratio/high_max": 0.0026653434615582228, "clip_ratio/high_mean": 0.0012437725854397286, "clip_ratio/low_mean": 0.001574280428030761, "clip_ratio/low_min": 0.0002538182880016393, "clip_ratio/region_mean": 0.002818052984366659, "epoch": 0.009146643800057166, "grad_norm": 0.1247875839471817, "learning_rate": 1e-06, "loss": 0.0214, "step": 98 }, { "clip_ratio/high_max": 0.002603532724606339, "clip_ratio/high_mean": 0.0012048694479744881, "clip_ratio/low_mean": 0.001481337756558787, "clip_ratio/low_min": 0.0001852586910899845, "clip_ratio/region_mean": 0.002686207204533275, "epoch": 0.00923997690005775, "grad_norm": 0.11257535964250565, "learning_rate": 1e-06, "loss": 0.0077, "step": 99 }, { "clip_ratio/high_max": 0.002924049971625209, "clip_ratio/high_mean": 0.0012575002801895607, "clip_ratio/low_mean": 0.0015386971317639109, "clip_ratio/low_min": 0.00033075985629693605, "clip_ratio/region_mean": 0.0027961974337813444, "epoch": 0.009333310000058333, "grad_norm": 0.12220936268568039, "learning_rate": 1e-06, "loss": 0.0352, "step": 100 }, { "clip_ratio/high_max": 0.0028391593587002717, "clip_ratio/high_mean": 0.0012918183238070924, "clip_ratio/low_mean": 0.0013782262103632092, "clip_ratio/low_min": 0.0001240499914274551, "clip_ratio/region_mean": 0.002670044530532323, "epoch": 0.009426643100058916, "grad_norm": 0.12901794910430908, "learning_rate": 1e-06, "loss": -0.0133, "step": 101 }, { "clip_ratio/high_max": 0.002537941516493447, "clip_ratio/high_mean": 0.0013183612245484255, "clip_ratio/low_mean": 0.0013246296730358154, "clip_ratio/low_min": 0.00013077422318019671, "clip_ratio/region_mean": 0.002642990875756368, "epoch": 0.0095199762000595, "grad_norm": 0.1298808455467224, "learning_rate": 1e-06, "loss": -0.0233, "step": 102 }, { "clip_ratio/high_max": 0.0031578919224557467, "clip_ratio/high_mean": 0.001373731909552589, "clip_ratio/low_mean": 0.0014723148960911203, "clip_ratio/low_min": 0.00019456338122836314, "clip_ratio/region_mean": 0.0028460468456614763, "epoch": 0.009613309300060083, "grad_norm": 0.11997205764055252, "learning_rate": 1e-06, "loss": 0.0065, "step": 103 }, { "clip_ratio/high_max": 0.002865104914235417, "clip_ratio/high_mean": 0.0012580548718688078, "clip_ratio/low_mean": 0.0014033523948455695, "clip_ratio/low_min": 0.0004038890856463695, "clip_ratio/region_mean": 0.0026614072121446952, "epoch": 0.009706642400060667, "grad_norm": 0.12166479229927063, "learning_rate": 1e-06, "loss": 0.0303, "step": 104 }, { "clip_ratio/high_max": 0.0024868289765436202, "clip_ratio/high_mean": 0.001170450708741555, "clip_ratio/low_mean": 0.0014502817575703375, "clip_ratio/low_min": 0.0002118278325724532, "clip_ratio/region_mean": 0.0026207323535345495, "epoch": 0.00979997550006125, "grad_norm": 0.1195027306675911, "learning_rate": 1e-06, "loss": 0.0467, "step": 105 }, { "clip_ratio/high_max": 0.003201271560101304, "clip_ratio/high_mean": 0.0014800217068113852, "clip_ratio/low_mean": 0.0015999262213881593, "clip_ratio/low_min": 0.0003387703791304375, "clip_ratio/region_mean": 0.0030799479718552902, "epoch": 0.009893308600061834, "grad_norm": 0.13096079230308533, "learning_rate": 1e-06, "loss": -0.0112, "step": 106 }, { "clip_ratio/high_max": 0.0030543005632353015, "clip_ratio/high_mean": 0.0012968190567335114, "clip_ratio/low_mean": 0.0015272485397872515, "clip_ratio/low_min": 0.0001728877341520274, "clip_ratio/region_mean": 0.002824067632900551, "epoch": 0.009986641700062417, "grad_norm": 0.1275666058063507, "learning_rate": 1e-06, "loss": 0.0272, "step": 107 }, { "clip_ratio/high_max": 0.0025191621753037907, "clip_ratio/high_mean": 0.0012134683529438917, "clip_ratio/low_mean": 0.0014848986320430413, "clip_ratio/low_min": 0.00025858148364932276, "clip_ratio/region_mean": 0.002698366981348954, "epoch": 0.010079974800063, "grad_norm": 0.11229578405618668, "learning_rate": 1e-06, "loss": 0.0519, "step": 108 }, { "clip_ratio/high_max": 0.003106487449258566, "clip_ratio/high_mean": 0.0014111388445599005, "clip_ratio/low_mean": 0.0014543828710884554, "clip_ratio/low_min": 0.00022016003640601411, "clip_ratio/region_mean": 0.0028655217320192605, "epoch": 0.010173307900063584, "grad_norm": 0.13031111657619476, "learning_rate": 1e-06, "loss": -0.0293, "step": 109 }, { "clip_ratio/high_max": 0.002698599433642812, "clip_ratio/high_mean": 0.0012986355268367333, "clip_ratio/low_mean": 0.0016266366365016438, "clip_ratio/low_min": 0.00011085279948019888, "clip_ratio/region_mean": 0.0029252721869852394, "epoch": 0.010266641000064166, "grad_norm": 0.12719281017780304, "learning_rate": 1e-06, "loss": 0.0417, "step": 110 }, { "clip_ratio/high_max": 0.0024644344302942045, "clip_ratio/high_mean": 0.0012884143980045337, "clip_ratio/low_mean": 0.0014313229476101696, "clip_ratio/low_min": 0.000225196647534176, "clip_ratio/region_mean": 0.0027197373201488517, "epoch": 0.010359974100064749, "grad_norm": 0.11651661992073059, "learning_rate": 1e-06, "loss": 0.009, "step": 111 }, { "clip_ratio/high_max": 0.002528496646846179, "clip_ratio/high_mean": 0.001220474463480059, "clip_ratio/low_mean": 0.0014707193258800544, "clip_ratio/low_min": 0.0003620400821091607, "clip_ratio/region_mean": 0.002691193869395647, "epoch": 0.010453307200065333, "grad_norm": 0.10828004777431488, "learning_rate": 1e-06, "loss": 0.0247, "step": 112 }, { "clip_ratio/high_max": 0.0033653701102593914, "clip_ratio/high_mean": 0.001461264935642248, "clip_ratio/low_mean": 0.00145121775494772, "clip_ratio/low_min": 0.00017661723177297972, "clip_ratio/region_mean": 0.002912482712417841, "epoch": 0.010546640300065916, "grad_norm": 0.12943993508815765, "learning_rate": 1e-06, "loss": -0.0168, "step": 113 }, { "clip_ratio/high_max": 0.002859338892449159, "clip_ratio/high_mean": 0.001295949798077345, "clip_ratio/low_mean": 0.0015416161986649968, "clip_ratio/low_min": 0.0002221035938418936, "clip_ratio/region_mean": 0.0028375659458106384, "epoch": 0.0106399734000665, "grad_norm": 0.11572157591581345, "learning_rate": 1e-06, "loss": 0.0296, "step": 114 }, { "clip_ratio/high_max": 0.002783154930511955, "clip_ratio/high_mean": 0.0013088373962091282, "clip_ratio/low_mean": 0.00147791879135184, "clip_ratio/low_min": 0.0002673045901246951, "clip_ratio/region_mean": 0.002786756187560968, "epoch": 0.010733306500067083, "grad_norm": 0.12409446388483047, "learning_rate": 1e-06, "loss": 0.0011, "step": 115 }, { "clip_ratio/high_max": 0.002771793559077196, "clip_ratio/high_mean": 0.0013845524408679921, "clip_ratio/low_mean": 0.001649686226301128, "clip_ratio/low_min": 0.0003981206173193641, "clip_ratio/region_mean": 0.0030342386598931625, "epoch": 0.010826639600067666, "grad_norm": 0.13282372057437897, "learning_rate": 1e-06, "loss": 0.0223, "step": 116 }, { "clip_ratio/high_max": 0.0022662589472020045, "clip_ratio/high_mean": 0.0012029814461129718, "clip_ratio/low_mean": 0.0013570538721978664, "clip_ratio/low_min": 0.000257093673099007, "clip_ratio/region_mean": 0.0025600353546906263, "epoch": 0.01091997270006825, "grad_norm": 0.11931352317333221, "learning_rate": 1e-06, "loss": 0.0028, "step": 117 }, { "clip_ratio/high_max": 0.003215425334929023, "clip_ratio/high_mean": 0.0013237579569249647, "clip_ratio/low_mean": 0.0016452279669465497, "clip_ratio/low_min": 0.0003323610362713225, "clip_ratio/region_mean": 0.002968985980260186, "epoch": 0.011013305800068833, "grad_norm": 0.13158798217773438, "learning_rate": 1e-06, "loss": 0.0152, "step": 118 }, { "clip_ratio/high_max": 0.002647452463861555, "clip_ratio/high_mean": 0.0012800487056665588, "clip_ratio/low_mean": 0.001443458786525298, "clip_ratio/low_min": 0.00033292964508291334, "clip_ratio/region_mean": 0.0027235074812779203, "epoch": 0.011106638900069417, "grad_norm": 0.11989909410476685, "learning_rate": 1e-06, "loss": 0.0483, "step": 119 }, { "clip_ratio/high_max": 0.0027026427269447595, "clip_ratio/high_mean": 0.0010280060960212722, "clip_ratio/low_mean": 0.0014192807830113452, "clip_ratio/low_min": 0.00029264237309689634, "clip_ratio/region_mean": 0.002447286926326342, "epoch": 0.01119997200007, "grad_norm": 0.11595442146062851, "learning_rate": 1e-06, "loss": 0.068, "step": 120 }, { "clip_ratio/high_max": 0.0025730949273565784, "clip_ratio/high_mean": 0.0011617753261816688, "clip_ratio/low_mean": 0.0015755972526676487, "clip_ratio/low_min": 0.00029739353885815945, "clip_ratio/region_mean": 0.002737372647970915, "epoch": 0.011293305100070584, "grad_norm": 0.11768896877765656, "learning_rate": 1e-06, "loss": 0.0569, "step": 121 }, { "clip_ratio/high_max": 0.0026708618825068697, "clip_ratio/high_mean": 0.0012442792249203194, "clip_ratio/low_mean": 0.0014611152982979547, "clip_ratio/low_min": 0.00034541680088295834, "clip_ratio/region_mean": 0.0027053944577346556, "epoch": 0.011386638200071167, "grad_norm": 0.11789402365684509, "learning_rate": 1e-06, "loss": 0.0193, "step": 122 }, { "clip_ratio/high_max": 0.0028895758150611073, "clip_ratio/high_mean": 0.0014895897838869132, "clip_ratio/low_mean": 0.0015124876772460993, "clip_ratio/low_min": 0.00019328471171320416, "clip_ratio/region_mean": 0.0030020774429431185, "epoch": 0.011479971300071749, "grad_norm": 0.12825018167495728, "learning_rate": 1e-06, "loss": -0.0431, "step": 123 }, { "clip_ratio/high_max": 0.0023921758111100644, "clip_ratio/high_mean": 0.001148466060840292, "clip_ratio/low_mean": 0.0014945909897505771, "clip_ratio/low_min": 0.00017136218139057746, "clip_ratio/region_mean": 0.002643057086970657, "epoch": 0.011573304400072332, "grad_norm": 0.12371445447206497, "learning_rate": 1e-06, "loss": 0.0536, "step": 124 }, { "clip_ratio/high_max": 0.0026537046942394227, "clip_ratio/high_mean": 0.001262595902517205, "clip_ratio/low_mean": 0.0016034173786465544, "clip_ratio/low_min": 0.0002627986832521856, "clip_ratio/region_mean": 0.0028660132811637595, "epoch": 0.011666637500072916, "grad_norm": 0.11962024867534637, "learning_rate": 1e-06, "loss": 0.0329, "step": 125 }, { "clip_ratio/high_max": 0.0028672960033873096, "clip_ratio/high_mean": 0.0012027163611492142, "clip_ratio/low_mean": 0.0014232714383979328, "clip_ratio/low_min": 0.0004464297744561918, "clip_ratio/region_mean": 0.0026259877486154437, "epoch": 0.0117599706000735, "grad_norm": 0.1233714148402214, "learning_rate": 1e-06, "loss": 0.0099, "step": 126 }, { "clip_ratio/high_max": 0.0030454081352218054, "clip_ratio/high_mean": 0.0013154065491107758, "clip_ratio/low_mean": 0.0013777863496216014, "clip_ratio/low_min": 0.00026856406748265726, "clip_ratio/region_mean": 0.002693192960578017, "epoch": 0.011853303700074083, "grad_norm": 0.11404651403427124, "learning_rate": 1e-06, "loss": -0.0047, "step": 127 }, { "clip_ratio/high_max": 0.003028228136827238, "clip_ratio/high_mean": 0.001399489050527336, "clip_ratio/low_mean": 0.0014854848777758889, "clip_ratio/low_min": 0.00018905064644059166, "clip_ratio/region_mean": 0.0028849739173892885, "epoch": 0.011946636800074666, "grad_norm": 0.12079603970050812, "learning_rate": 1e-06, "loss": 0.0171, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013593401227678603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 608.6326293945312, "completions/mean_terminated_length": 560.5741577148438, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.01203996990007525, "grad_norm": 0.13403193652629852, "learning_rate": 1e-06, "loss": 0.0436, "num_tokens": 161463676.0, "reward": 0.5264195203781128, "reward_std": 0.24368982017040253, "rewards/simpleverify_reward/mean": 0.5264195203781128, "rewards/simpleverify_reward/std": 0.49930375814437866, "step": 129 }, { "clip_ratio/high_max": 0.0024332596149179153, "clip_ratio/high_mean": 0.000988849033092265, "clip_ratio/low_mean": 0.0007325897458940744, "clip_ratio/low_min": 7.720795656496193e-05, "clip_ratio/region_mean": 0.0017214387553394772, "epoch": 0.012133303000075833, "grad_norm": 0.1320538967847824, "learning_rate": 1e-06, "loss": 0.0148, "step": 130 }, { "clip_ratio/high_max": 0.0021657638280885294, "clip_ratio/high_mean": 0.0008973378025984857, "clip_ratio/low_mean": 0.0007111463273759, "clip_ratio/low_min": 7.660079336346826e-05, "clip_ratio/region_mean": 0.0016084840826806612, "epoch": 0.012226636100076417, "grad_norm": 0.11757486313581467, "learning_rate": 1e-06, "loss": 0.0488, "step": 131 }, { "clip_ratio/high_max": 0.0022863169870106503, "clip_ratio/high_mean": 0.000983288615316269, "clip_ratio/low_mean": 0.0006977904340601526, "clip_ratio/low_min": 4.470737712836126e-05, "clip_ratio/region_mean": 0.0016810790621093474, "epoch": 0.012319969200077, "grad_norm": 0.13243581354618073, "learning_rate": 1e-06, "loss": 0.0306, "step": 132 }, { "clip_ratio/high_max": 0.0021634436197928153, "clip_ratio/high_mean": 0.0010413669515401125, "clip_ratio/low_mean": 0.0009701348044472979, "clip_ratio/low_min": 0.00018172072850575205, "clip_ratio/region_mean": 0.0020115017250645906, "epoch": 0.012413302300077584, "grad_norm": 0.13197748363018036, "learning_rate": 1e-06, "loss": 0.0428, "step": 133 }, { "clip_ratio/high_max": 0.002508608471543994, "clip_ratio/high_mean": 0.0010944333698716946, "clip_ratio/low_mean": 0.0010340974677092163, "clip_ratio/low_min": 0.00015072810447236407, "clip_ratio/region_mean": 0.002128530861227773, "epoch": 0.012506635400078167, "grad_norm": 0.12780120968818665, "learning_rate": 1e-06, "loss": -0.0193, "step": 134 }, { "clip_ratio/high_max": 0.0025976869801525027, "clip_ratio/high_mean": 0.0011295270887785591, "clip_ratio/low_mean": 0.0011326861276756972, "clip_ratio/low_min": 0.00012717880417767446, "clip_ratio/region_mean": 0.002262213201902341, "epoch": 0.01259996850007875, "grad_norm": 0.11698048561811447, "learning_rate": 1e-06, "loss": 0.0413, "step": 135 }, { "clip_ratio/high_max": 0.0025464736609137617, "clip_ratio/high_mean": 0.0011202241403225344, "clip_ratio/low_mean": 0.0010715174212236889, "clip_ratio/low_min": 0.00012168852572358446, "clip_ratio/region_mean": 0.002191741543356329, "epoch": 0.012693301600079332, "grad_norm": 0.12443061918020248, "learning_rate": 1e-06, "loss": 0.0103, "step": 136 }, { "clip_ratio/high_max": 0.002053405638434924, "clip_ratio/high_mean": 0.0009248903033949318, "clip_ratio/low_mean": 0.00100735711021116, "clip_ratio/low_min": 6.899488653289154e-05, "clip_ratio/region_mean": 0.0019322473381180316, "epoch": 0.012786634700079916, "grad_norm": 0.11504258960485458, "learning_rate": 1e-06, "loss": 0.0563, "step": 137 }, { "clip_ratio/high_max": 0.0025004914423334412, "clip_ratio/high_mean": 0.0011608715503825806, "clip_ratio/low_mean": 0.0009811282870941795, "clip_ratio/low_min": 0.00014834668490948388, "clip_ratio/region_mean": 0.0021419998156488873, "epoch": 0.0128799678000805, "grad_norm": 0.11799511313438416, "learning_rate": 1e-06, "loss": -0.0253, "step": 138 }, { "clip_ratio/high_max": 0.002182101146900095, "clip_ratio/high_mean": 0.0011294264459138503, "clip_ratio/low_mean": 0.0010483198529982474, "clip_ratio/low_min": 0.0001590806550666457, "clip_ratio/region_mean": 0.0021777462752652355, "epoch": 0.012973300900081083, "grad_norm": 0.10931219160556793, "learning_rate": 1e-06, "loss": -0.0026, "step": 139 }, { "clip_ratio/high_max": 0.0023643400927539915, "clip_ratio/high_mean": 0.001019347782857949, "clip_ratio/low_mean": 0.0010191293695243075, "clip_ratio/low_min": 0.00012544995706775808, "clip_ratio/region_mean": 0.002038477163296193, "epoch": 0.013066634000081666, "grad_norm": 0.12056972831487656, "learning_rate": 1e-06, "loss": 0.0039, "step": 140 }, { "clip_ratio/high_max": 0.002797265253320802, "clip_ratio/high_mean": 0.0011310938352835365, "clip_ratio/low_mean": 0.0012077284227416385, "clip_ratio/low_min": 0.00016638736451568548, "clip_ratio/region_mean": 0.002338822239835281, "epoch": 0.01315996710008225, "grad_norm": 0.12474213540554047, "learning_rate": 1e-06, "loss": 0.0228, "step": 141 }, { "clip_ratio/high_max": 0.0023029514122754335, "clip_ratio/high_mean": 0.0009922978970280383, "clip_ratio/low_mean": 0.0010987028290401213, "clip_ratio/low_min": 0.00017076500898838276, "clip_ratio/region_mean": 0.002091000773361884, "epoch": 0.013253300200082833, "grad_norm": 0.12109792232513428, "learning_rate": 1e-06, "loss": 0.0917, "step": 142 }, { "clip_ratio/high_max": 0.0023453186986444052, "clip_ratio/high_mean": 0.0010823465017892886, "clip_ratio/low_mean": 0.0012704171913355822, "clip_ratio/low_min": 0.00016696958391548833, "clip_ratio/region_mean": 0.002352763716771733, "epoch": 0.013346633300083417, "grad_norm": 0.12181831896305084, "learning_rate": 1e-06, "loss": 0.0506, "step": 143 }, { "clip_ratio/high_max": 0.002538202148571145, "clip_ratio/high_mean": 0.001040334809658816, "clip_ratio/low_mean": 0.0009443219387321733, "clip_ratio/low_min": 5.9650946695910534e-05, "clip_ratio/region_mean": 0.0019846567083732225, "epoch": 0.013439966400084, "grad_norm": 0.12078903615474701, "learning_rate": 1e-06, "loss": -0.004, "step": 144 }, { "clip_ratio/high_max": 0.002726344384427648, "clip_ratio/high_mean": 0.001087585336790653, "clip_ratio/low_mean": 0.0011553774274943862, "clip_ratio/low_min": 0.00021918675520282704, "clip_ratio/region_mean": 0.0022429627060773782, "epoch": 0.013533299500084583, "grad_norm": 0.1193271055817604, "learning_rate": 1e-06, "loss": 0.0332, "step": 145 }, { "clip_ratio/high_max": 0.002232237191492459, "clip_ratio/high_mean": 0.0010926271625066875, "clip_ratio/low_mean": 0.0010220541535090888, "clip_ratio/low_min": 0.0001043354222929338, "clip_ratio/region_mean": 0.0021146814033272676, "epoch": 0.013626632600085167, "grad_norm": 0.11858843266963959, "learning_rate": 1e-06, "loss": 0.0226, "step": 146 }, { "clip_ratio/high_max": 0.0022686543998133857, "clip_ratio/high_mean": 0.0010066488266602391, "clip_ratio/low_mean": 0.0011608630047703627, "clip_ratio/low_min": 0.00013585816213890212, "clip_ratio/region_mean": 0.0021675117968698032, "epoch": 0.01371996570008575, "grad_norm": 0.11356138437986374, "learning_rate": 1e-06, "loss": 0.0212, "step": 147 }, { "clip_ratio/high_max": 0.0021834527287865058, "clip_ratio/high_mean": 0.0011363685262040235, "clip_ratio/low_mean": 0.001056505801898311, "clip_ratio/low_min": 0.00016823627811390907, "clip_ratio/region_mean": 0.0021928743517491966, "epoch": 0.013813298800086334, "grad_norm": 0.1225375235080719, "learning_rate": 1e-06, "loss": 0.0023, "step": 148 }, { "clip_ratio/high_max": 0.0024487768532708287, "clip_ratio/high_mean": 0.0010493347981537227, "clip_ratio/low_mean": 0.00105377857471467, "clip_ratio/low_min": 0.00017336217479169136, "clip_ratio/region_mean": 0.0021031133801443502, "epoch": 0.013906631900086916, "grad_norm": 0.11625830084085464, "learning_rate": 1e-06, "loss": 0.0069, "step": 149 }, { "clip_ratio/high_max": 0.0023190810898086056, "clip_ratio/high_mean": 0.0009918206469592405, "clip_ratio/low_mean": 0.0012297029243200086, "clip_ratio/low_min": 0.00022102572802396026, "clip_ratio/region_mean": 0.002221523565822281, "epoch": 0.013999965000087499, "grad_norm": 0.1541500836610794, "learning_rate": 1e-06, "loss": 0.0353, "step": 150 }, { "clip_ratio/high_max": 0.002041557072516298, "clip_ratio/high_mean": 0.0009195821912726387, "clip_ratio/low_mean": 0.0011850562696054112, "clip_ratio/low_min": 7.665563680347987e-05, "clip_ratio/region_mean": 0.002104638442688156, "epoch": 0.014093298100088083, "grad_norm": 0.10795846581459045, "learning_rate": 1e-06, "loss": 0.0586, "step": 151 }, { "clip_ratio/high_max": 0.0023418266500812024, "clip_ratio/high_mean": 0.0010353720535931643, "clip_ratio/low_mean": 0.0012098116203560494, "clip_ratio/low_min": 0.00022125856412458234, "clip_ratio/region_mean": 0.0022451836994150653, "epoch": 0.014186631200088666, "grad_norm": 0.11836786568164825, "learning_rate": 1e-06, "loss": 0.0316, "step": 152 }, { "clip_ratio/high_max": 0.002334540316951461, "clip_ratio/high_mean": 0.001038872684148373, "clip_ratio/low_mean": 0.001198381680296734, "clip_ratio/low_min": 0.0002770094288280234, "clip_ratio/region_mean": 0.0022372543389792554, "epoch": 0.01427996430008925, "grad_norm": 0.1169411838054657, "learning_rate": 1e-06, "loss": 0.0653, "step": 153 }, { "clip_ratio/high_max": 0.0026761881235870533, "clip_ratio/high_mean": 0.0011457030232122634, "clip_ratio/low_mean": 0.0011620547084021382, "clip_ratio/low_min": 0.00014709090828546323, "clip_ratio/region_mean": 0.002307757742528338, "epoch": 0.014373297400089833, "grad_norm": 0.12875445187091827, "learning_rate": 1e-06, "loss": -0.0193, "step": 154 }, { "clip_ratio/high_max": 0.002504294730897527, "clip_ratio/high_mean": 0.0010995161756000016, "clip_ratio/low_mean": 0.0013625548344862182, "clip_ratio/low_min": 0.00011030960831703851, "clip_ratio/region_mean": 0.0024620710173621774, "epoch": 0.014466630500090416, "grad_norm": 0.13295584917068481, "learning_rate": 1e-06, "loss": 0.0388, "step": 155 }, { "clip_ratio/high_max": 0.002898297374485992, "clip_ratio/high_mean": 0.0012882258597528562, "clip_ratio/low_mean": 0.0009775020225788467, "clip_ratio/low_min": 9.865721949608997e-05, "clip_ratio/region_mean": 0.0022657278168480843, "epoch": 0.014559963600091, "grad_norm": 0.11341188102960587, "learning_rate": 1e-06, "loss": -0.0161, "step": 156 }, { "clip_ratio/high_max": 0.002397498770733364, "clip_ratio/high_mean": 0.0011049304812331684, "clip_ratio/low_mean": 0.0013211759433033876, "clip_ratio/low_min": 0.0003025866217285511, "clip_ratio/region_mean": 0.0024261063590529375, "epoch": 0.014653296700091583, "grad_norm": 0.12226711213588715, "learning_rate": 1e-06, "loss": 0.0367, "step": 157 }, { "clip_ratio/high_max": 0.0027256596731604077, "clip_ratio/high_mean": 0.0012023343224427663, "clip_ratio/low_mean": 0.0012399832849041559, "clip_ratio/low_min": 0.00010837760873982916, "clip_ratio/region_mean": 0.002442317629174795, "epoch": 0.014746629800092167, "grad_norm": 0.13183185458183289, "learning_rate": 1e-06, "loss": -0.0048, "step": 158 }, { "clip_ratio/high_max": 0.0027951238662353717, "clip_ratio/high_mean": 0.0013079852215014398, "clip_ratio/low_mean": 0.0011484756687423214, "clip_ratio/low_min": 7.220344923553057e-05, "clip_ratio/region_mean": 0.0024564609047956765, "epoch": 0.01483996290009275, "grad_norm": 0.1265721619129181, "learning_rate": 1e-06, "loss": -0.0289, "step": 159 }, { "clip_ratio/high_max": 0.002463600321789272, "clip_ratio/high_mean": 0.0010970926232403144, "clip_ratio/low_mean": 0.0012090505733794998, "clip_ratio/low_min": 0.00024864260376489256, "clip_ratio/region_mean": 0.0023061432148097083, "epoch": 0.014933296000093334, "grad_norm": 0.11900194734334946, "learning_rate": 1e-06, "loss": 0.0294, "step": 160 }, { "clip_ratio/high_max": 0.0030124678887659684, "clip_ratio/high_mean": 0.0012069379117747303, "clip_ratio/low_mean": 0.001343024010566296, "clip_ratio/low_min": 0.0002747145690591424, "clip_ratio/region_mean": 0.002549961893237196, "epoch": 0.015026629100093917, "grad_norm": 0.13085706532001495, "learning_rate": 1e-06, "loss": -0.0138, "step": 161 }, { "clip_ratio/high_max": 0.0029037394851911813, "clip_ratio/high_mean": 0.0012902680682600476, "clip_ratio/low_mean": 0.0014299489303084556, "clip_ratio/low_min": 0.0002330641882508644, "clip_ratio/region_mean": 0.0027202170094824396, "epoch": 0.015119962200094499, "grad_norm": 0.12124091386795044, "learning_rate": 1e-06, "loss": 0.0245, "step": 162 }, { "clip_ratio/high_max": 0.002570524586190004, "clip_ratio/high_mean": 0.0010408045891381335, "clip_ratio/low_mean": 0.0014011953135195654, "clip_ratio/low_min": 0.0001897479796753032, "clip_ratio/region_mean": 0.0024419998735538684, "epoch": 0.015213295300095082, "grad_norm": 0.11770489811897278, "learning_rate": 1e-06, "loss": 0.0537, "step": 163 }, { "clip_ratio/high_max": 0.002462083582940977, "clip_ratio/high_mean": 0.0012201415847812314, "clip_ratio/low_mean": 0.0013771850954071851, "clip_ratio/low_min": 0.0002169743620470399, "clip_ratio/region_mean": 0.002597326696559321, "epoch": 0.015306628400095666, "grad_norm": 0.12022513896226883, "learning_rate": 1e-06, "loss": 0.0005, "step": 164 }, { "clip_ratio/high_max": 0.0023391723589156754, "clip_ratio/high_mean": 0.0010765838596853428, "clip_ratio/low_mean": 0.0012353022757451981, "clip_ratio/low_min": 0.0002053519001492532, "clip_ratio/region_mean": 0.0023118862009141594, "epoch": 0.01539996150009625, "grad_norm": 0.12212057411670685, "learning_rate": 1e-06, "loss": 0.0259, "step": 165 }, { "clip_ratio/high_max": 0.002613412339997012, "clip_ratio/high_mean": 0.0011951558954024222, "clip_ratio/low_mean": 0.0012524801059043966, "clip_ratio/low_min": 9.570693509886041e-05, "clip_ratio/region_mean": 0.0024476360122207552, "epoch": 0.015493294600096833, "grad_norm": 0.11848278343677521, "learning_rate": 1e-06, "loss": -0.0015, "step": 166 }, { "clip_ratio/high_max": 0.0026641819858923554, "clip_ratio/high_mean": 0.0011898095290234778, "clip_ratio/low_mean": 0.0014335005325847305, "clip_ratio/low_min": 0.00018667688163986895, "clip_ratio/region_mean": 0.0026233100361423567, "epoch": 0.015586627700097416, "grad_norm": 0.11074207723140717, "learning_rate": 1e-06, "loss": 0.0358, "step": 167 }, { "clip_ratio/high_max": 0.002792465042148251, "clip_ratio/high_mean": 0.0012589432826644043, "clip_ratio/low_mean": 0.001214016450830968, "clip_ratio/low_min": 0.0002791898023133399, "clip_ratio/region_mean": 0.0024729597353143618, "epoch": 0.015679960800098, "grad_norm": 0.1204812079668045, "learning_rate": 1e-06, "loss": 0.0061, "step": 168 }, { "clip_ratio/high_max": 0.002655859261722071, "clip_ratio/high_mean": 0.0012477895252231974, "clip_ratio/low_mean": 0.0013207292249717284, "clip_ratio/low_min": 0.00018592403375805588, "clip_ratio/region_mean": 0.0025685187792987563, "epoch": 0.015773293900098583, "grad_norm": 0.127878338098526, "learning_rate": 1e-06, "loss": 0.0376, "step": 169 }, { "clip_ratio/high_max": 0.002435986214550212, "clip_ratio/high_mean": 0.0011270721333858091, "clip_ratio/low_mean": 0.001023003274895018, "clip_ratio/low_min": 8.427184548054356e-05, "clip_ratio/region_mean": 0.0021500754592125304, "epoch": 0.015866627000099165, "grad_norm": 0.11675351858139038, "learning_rate": 1e-06, "loss": -0.0311, "step": 170 }, { "clip_ratio/high_max": 0.0022785776636737864, "clip_ratio/high_mean": 0.001066446466211346, "clip_ratio/low_mean": 0.001355757503915811, "clip_ratio/low_min": 0.00021683182967535686, "clip_ratio/region_mean": 0.0024222039646701887, "epoch": 0.01595996010009975, "grad_norm": 0.12287482619285583, "learning_rate": 1e-06, "loss": 0.0524, "step": 171 }, { "clip_ratio/high_max": 0.002698982549190987, "clip_ratio/high_mean": 0.0011119126538687851, "clip_ratio/low_mean": 0.0011199601940461434, "clip_ratio/low_min": 6.396692151611205e-05, "clip_ratio/region_mean": 0.002231872880656738, "epoch": 0.016053293200100332, "grad_norm": 0.10930777341127396, "learning_rate": 1e-06, "loss": 0.0501, "step": 172 }, { "clip_ratio/high_max": 0.0029524349665734917, "clip_ratio/high_mean": 0.001257061452633934, "clip_ratio/low_mean": 0.0011075040310970508, "clip_ratio/low_min": 2.797046363411937e-05, "clip_ratio/region_mean": 0.002364565574680455, "epoch": 0.016146626300100917, "grad_norm": 0.12324044853448868, "learning_rate": 1e-06, "loss": -0.0223, "step": 173 }, { "clip_ratio/high_max": 0.0030870284972479567, "clip_ratio/high_mean": 0.0013111203588778153, "clip_ratio/low_mean": 0.0010724750227382174, "clip_ratio/low_min": 3.4246347240696196e-05, "clip_ratio/region_mean": 0.0023835953616071492, "epoch": 0.0162399594001015, "grad_norm": 0.11606115102767944, "learning_rate": 1e-06, "loss": -0.0054, "step": 174 }, { "clip_ratio/high_max": 0.002638303725689184, "clip_ratio/high_mean": 0.0011559507074707653, "clip_ratio/low_mean": 0.0013022475613979623, "clip_ratio/low_min": 0.00029158973120502196, "clip_ratio/region_mean": 0.002458198221575003, "epoch": 0.016333292500102084, "grad_norm": 0.1235213652253151, "learning_rate": 1e-06, "loss": 0.0264, "step": 175 }, { "clip_ratio/high_max": 0.0027016413878300227, "clip_ratio/high_mean": 0.0011622654237726238, "clip_ratio/low_mean": 0.0014873583386361133, "clip_ratio/low_min": 0.0002066812085104175, "clip_ratio/region_mean": 0.0026496236823732033, "epoch": 0.016426625600102666, "grad_norm": 0.1200484111905098, "learning_rate": 1e-06, "loss": 0.0522, "step": 176 }, { "clip_ratio/high_max": 0.002672854039701633, "clip_ratio/high_mean": 0.0012158513345639221, "clip_ratio/low_mean": 0.001363845191008295, "clip_ratio/low_min": 0.0003310314705231576, "clip_ratio/region_mean": 0.0025796965492190793, "epoch": 0.01651995870010325, "grad_norm": 0.11586089432239532, "learning_rate": 1e-06, "loss": 0.0199, "step": 177 }, { "clip_ratio/high_max": 0.0023066884532454424, "clip_ratio/high_mean": 0.001152584245573962, "clip_ratio/low_mean": 0.0015285496956494171, "clip_ratio/low_min": 0.00022844660452392418, "clip_ratio/region_mean": 0.002681133948499337, "epoch": 0.016613291800103833, "grad_norm": 0.11998248845338821, "learning_rate": 1e-06, "loss": 0.0494, "step": 178 }, { "clip_ratio/high_max": 0.0025488793035037816, "clip_ratio/high_mean": 0.001124043934396468, "clip_ratio/low_mean": 0.001438444851373788, "clip_ratio/low_min": 0.0002278391793879564, "clip_ratio/region_mean": 0.002562488843977917, "epoch": 0.016706624900104418, "grad_norm": 0.1215491071343422, "learning_rate": 1e-06, "loss": 0.0332, "step": 179 }, { "clip_ratio/high_max": 0.002647778470418416, "clip_ratio/high_mean": 0.00114215135545237, "clip_ratio/low_mean": 0.0013873987918486819, "clip_ratio/low_min": 0.00020759096332767513, "clip_ratio/region_mean": 0.0025295501691289246, "epoch": 0.016799958000105, "grad_norm": 0.13117018342018127, "learning_rate": 1e-06, "loss": 0.0383, "step": 180 }, { "clip_ratio/high_max": 0.0025569337012711912, "clip_ratio/high_mean": 0.0011582444039959228, "clip_ratio/low_mean": 0.0013539685933210421, "clip_ratio/low_min": 0.0002557817451815936, "clip_ratio/region_mean": 0.00251221295184223, "epoch": 0.016893291100105585, "grad_norm": 0.11314269155263901, "learning_rate": 1e-06, "loss": 0.0101, "step": 181 }, { "clip_ratio/high_max": 0.0026542178384261206, "clip_ratio/high_mean": 0.0011803568595496472, "clip_ratio/low_mean": 0.001240528654307127, "clip_ratio/low_min": 0.00016900484843063168, "clip_ratio/region_mean": 0.0024208855247707106, "epoch": 0.016986624200106167, "grad_norm": 0.11319568753242493, "learning_rate": 1e-06, "loss": -0.0093, "step": 182 }, { "clip_ratio/high_max": 0.0025909023970598355, "clip_ratio/high_mean": 0.0012374790785543155, "clip_ratio/low_mean": 0.001149815034295898, "clip_ratio/low_min": 9.820628747547744e-05, "clip_ratio/region_mean": 0.0023872940728324465, "epoch": 0.017079957300106748, "grad_norm": 0.1228477731347084, "learning_rate": 1e-06, "loss": -0.0159, "step": 183 }, { "clip_ratio/high_max": 0.0026282051803718787, "clip_ratio/high_mean": 0.001076097305485746, "clip_ratio/low_mean": 0.0013790780431008898, "clip_ratio/low_min": 0.00014604539319407195, "clip_ratio/region_mean": 0.0024551753886044025, "epoch": 0.017173290400107333, "grad_norm": 0.11773689836263657, "learning_rate": 1e-06, "loss": 0.0332, "step": 184 }, { "clip_ratio/high_max": 0.002487379388185218, "clip_ratio/high_mean": 0.0011027091277355794, "clip_ratio/low_mean": 0.0014333704857563134, "clip_ratio/low_min": 0.00023913930817798246, "clip_ratio/region_mean": 0.0025360796207678504, "epoch": 0.017266623500107915, "grad_norm": 0.12084262818098068, "learning_rate": 1e-06, "loss": 0.0375, "step": 185 }, { "clip_ratio/high_max": 0.0027148363151354715, "clip_ratio/high_mean": 0.0012640070017368998, "clip_ratio/low_mean": 0.0013348884640436154, "clip_ratio/low_min": 0.00010849974933080375, "clip_ratio/region_mean": 0.0025988954585045576, "epoch": 0.0173599566001085, "grad_norm": 0.12811660766601562, "learning_rate": 1e-06, "loss": 0.0047, "step": 186 }, { "clip_ratio/high_max": 0.0025311945064458996, "clip_ratio/high_mean": 0.0011070284162997268, "clip_ratio/low_mean": 0.0014850531370029785, "clip_ratio/low_min": 0.00022472956516139675, "clip_ratio/region_mean": 0.002592081575130578, "epoch": 0.017453289700109082, "grad_norm": 0.12231775373220444, "learning_rate": 1e-06, "loss": 0.0497, "step": 187 }, { "clip_ratio/high_max": 0.002489825477823615, "clip_ratio/high_mean": 0.001205796728754649, "clip_ratio/low_mean": 0.0014040306305105332, "clip_ratio/low_min": 0.0001636901706660865, "clip_ratio/region_mean": 0.002609827417472843, "epoch": 0.017546622800109667, "grad_norm": 0.13298480212688446, "learning_rate": 1e-06, "loss": 0.0322, "step": 188 }, { "clip_ratio/high_max": 0.0027122661122120917, "clip_ratio/high_mean": 0.0011768161966756452, "clip_ratio/low_mean": 0.001252476402441971, "clip_ratio/low_min": 0.000145522688399069, "clip_ratio/region_mean": 0.002429292602755595, "epoch": 0.01763995590011025, "grad_norm": 0.11797142028808594, "learning_rate": 1e-06, "loss": 0.0056, "step": 189 }, { "clip_ratio/high_max": 0.0025622150496928953, "clip_ratio/high_mean": 0.0011796395519922953, "clip_ratio/low_mean": 0.0011735809675883502, "clip_ratio/low_min": 9.31727090573986e-05, "clip_ratio/region_mean": 0.002353220494114794, "epoch": 0.017733289000110834, "grad_norm": 0.11661848425865173, "learning_rate": 1e-06, "loss": 0.0263, "step": 190 }, { "clip_ratio/high_max": 0.002745186437095981, "clip_ratio/high_mean": 0.0012337444786680862, "clip_ratio/low_mean": 0.0012545866557047702, "clip_ratio/low_min": 4.716317380371038e-05, "clip_ratio/region_mean": 0.0024883311125449836, "epoch": 0.017826622100111416, "grad_norm": 0.1126817911863327, "learning_rate": 1e-06, "loss": 0.0351, "step": 191 }, { "clip_ratio/high_max": 0.002563778281910345, "clip_ratio/high_mean": 0.0011736436263163341, "clip_ratio/low_mean": 0.001236382762726862, "clip_ratio/low_min": 0.00013949385265732417, "clip_ratio/region_mean": 0.0024100263763102703, "epoch": 0.017919955200112, "grad_norm": 0.10997709631919861, "learning_rate": 1e-06, "loss": -0.0091, "step": 192 }, { "clip_ratio/high_max": 0.0025424959167139605, "clip_ratio/high_mean": 0.0012055810184392612, "clip_ratio/low_mean": 0.0011300416990707163, "clip_ratio/low_min": 4.500285558606265e-05, "clip_ratio/region_mean": 0.0023356227538897656, "epoch": 0.018013288300112583, "grad_norm": 0.12096629291772842, "learning_rate": 1e-06, "loss": 0.0247, "step": 193 }, { "clip_ratio/high_max": 0.0026967603407683782, "clip_ratio/high_mean": 0.0012921491615998093, "clip_ratio/low_mean": 0.001306394240600639, "clip_ratio/low_min": 0.0003159637963108253, "clip_ratio/region_mean": 0.0025985434112953953, "epoch": 0.018106621400113165, "grad_norm": 0.11602344363927841, "learning_rate": 1e-06, "loss": -0.0158, "step": 194 }, { "clip_ratio/high_max": 0.002505861884856131, "clip_ratio/high_mean": 0.0011906351828656625, "clip_ratio/low_mean": 0.0013018415120313875, "clip_ratio/low_min": 0.00014446664863498881, "clip_ratio/region_mean": 0.0024924766912590712, "epoch": 0.01819995450011375, "grad_norm": 0.10961132496595383, "learning_rate": 1e-06, "loss": 0.021, "step": 195 }, { "clip_ratio/high_max": 0.0029728039080509916, "clip_ratio/high_mean": 0.0013984148354211356, "clip_ratio/low_mean": 0.001233438993949676, "clip_ratio/low_min": 0.00020975907318643294, "clip_ratio/region_mean": 0.0026318538730265573, "epoch": 0.01829328760011433, "grad_norm": 0.12933892011642456, "learning_rate": 1e-06, "loss": -0.0077, "step": 196 }, { "clip_ratio/high_max": 0.002315868485311512, "clip_ratio/high_mean": 0.0010439206307637505, "clip_ratio/low_mean": 0.0012818770665035117, "clip_ratio/low_min": 0.00022156216709845467, "clip_ratio/region_mean": 0.0023257977445609868, "epoch": 0.018386620700114917, "grad_norm": 0.11547909677028656, "learning_rate": 1e-06, "loss": 0.0783, "step": 197 }, { "clip_ratio/high_max": 0.0030729667996638454, "clip_ratio/high_mean": 0.0012934623337059747, "clip_ratio/low_mean": 0.0010308970995538402, "clip_ratio/low_min": 7.482250111934263e-05, "clip_ratio/region_mean": 0.002324359411431942, "epoch": 0.0184799538001155, "grad_norm": 0.10921128839254379, "learning_rate": 1e-06, "loss": -0.0103, "step": 198 }, { "clip_ratio/high_max": 0.0027546002602321096, "clip_ratio/high_mean": 0.0011375779504305683, "clip_ratio/low_mean": 0.0013485822637449019, "clip_ratio/low_min": 0.00010711663435358787, "clip_ratio/region_mean": 0.002486160199623555, "epoch": 0.018573286900116084, "grad_norm": 0.10429203510284424, "learning_rate": 1e-06, "loss": 0.039, "step": 199 }, { "clip_ratio/high_max": 0.0029651976146851666, "clip_ratio/high_mean": 0.001185284218081506, "clip_ratio/low_mean": 0.001405682025506394, "clip_ratio/low_min": 0.000203739423341176, "clip_ratio/region_mean": 0.0025909661926561967, "epoch": 0.018666620000116665, "grad_norm": 0.1098906546831131, "learning_rate": 1e-06, "loss": 0.0095, "step": 200 }, { "clip_ratio/high_max": 0.002753616361587774, "clip_ratio/high_mean": 0.0012288225007068831, "clip_ratio/low_mean": 0.0014717400299559813, "clip_ratio/low_min": 0.0002930818809545599, "clip_ratio/region_mean": 0.0027005625452147797, "epoch": 0.01875995310011725, "grad_norm": 0.11615803837776184, "learning_rate": 1e-06, "loss": 0.0524, "step": 201 }, { "clip_ratio/high_max": 0.002436910195683595, "clip_ratio/high_mean": 0.0012218611664138734, "clip_ratio/low_mean": 0.0013838372942700516, "clip_ratio/low_min": 0.0001856030121416552, "clip_ratio/region_mean": 0.00260569853708148, "epoch": 0.018853286200117832, "grad_norm": 0.11929242312908173, "learning_rate": 1e-06, "loss": 0.02, "step": 202 }, { "clip_ratio/high_max": 0.002949557761894539, "clip_ratio/high_mean": 0.0013158322180970572, "clip_ratio/low_mean": 0.0012549054190458264, "clip_ratio/low_min": 0.000141459025144286, "clip_ratio/region_mean": 0.0025707376626087353, "epoch": 0.018946619300118418, "grad_norm": 0.11886332929134369, "learning_rate": 1e-06, "loss": 0.0007, "step": 203 }, { "clip_ratio/high_max": 0.0025808335703914054, "clip_ratio/high_mean": 0.0011586524269660003, "clip_ratio/low_mean": 0.0013152622996130958, "clip_ratio/low_min": 0.0001259202563232975, "clip_ratio/region_mean": 0.0024739147775107995, "epoch": 0.019039952400119, "grad_norm": 0.1246223896741867, "learning_rate": 1e-06, "loss": 0.0322, "step": 204 }, { "clip_ratio/high_max": 0.0026471492237760685, "clip_ratio/high_mean": 0.0011050266730308067, "clip_ratio/low_mean": 0.0010999647311109584, "clip_ratio/low_min": 0.00021924682278040564, "clip_ratio/region_mean": 0.0022049914332455955, "epoch": 0.019133285500119585, "grad_norm": 0.10954298079013824, "learning_rate": 1e-06, "loss": 0.0226, "step": 205 }, { "clip_ratio/high_max": 0.002576317318016663, "clip_ratio/high_mean": 0.001156758709839778, "clip_ratio/low_mean": 0.0013909499139117543, "clip_ratio/low_min": 0.00019147875445924, "clip_ratio/region_mean": 0.0025477086019236594, "epoch": 0.019226618600120166, "grad_norm": 0.1190459206700325, "learning_rate": 1e-06, "loss": 0.0545, "step": 206 }, { "clip_ratio/high_max": 0.0027874516090378165, "clip_ratio/high_mean": 0.0012371846933092456, "clip_ratio/low_mean": 0.001243129518115893, "clip_ratio/low_min": 0.0002374109571974259, "clip_ratio/region_mean": 0.002480314185959287, "epoch": 0.019319951700120748, "grad_norm": 0.11833468079566956, "learning_rate": 1e-06, "loss": 0.0456, "step": 207 }, { "clip_ratio/high_max": 0.0027853806896018796, "clip_ratio/high_mean": 0.0012385505651764106, "clip_ratio/low_mean": 0.001268453099328326, "clip_ratio/low_min": 0.00011179314060427714, "clip_ratio/region_mean": 0.002507003620848991, "epoch": 0.019413284800121333, "grad_norm": 0.11821004748344421, "learning_rate": 1e-06, "loss": 0.0081, "step": 208 }, { "clip_ratio/high_max": 0.0024134113918989897, "clip_ratio/high_mean": 0.0010693396070564631, "clip_ratio/low_mean": 0.001365362993965391, "clip_ratio/low_min": 0.00020577540999511257, "clip_ratio/region_mean": 0.002434702590107918, "epoch": 0.019506617900121915, "grad_norm": 0.10630277544260025, "learning_rate": 1e-06, "loss": -0.0213, "step": 209 }, { "clip_ratio/high_max": 0.0024340629170183092, "clip_ratio/high_mean": 0.0011245457098993938, "clip_ratio/low_mean": 0.001490510501753306, "clip_ratio/low_min": 0.00010901557652687188, "clip_ratio/region_mean": 0.002615056175272912, "epoch": 0.0195999510001225, "grad_norm": 0.13615168631076813, "learning_rate": 1e-06, "loss": 0.0385, "step": 210 }, { "clip_ratio/high_max": 0.00241769700369332, "clip_ratio/high_mean": 0.0011127788384328596, "clip_ratio/low_mean": 0.001352664185105823, "clip_ratio/low_min": 0.00019827650248771533, "clip_ratio/region_mean": 0.002465442979882937, "epoch": 0.019693284100123082, "grad_norm": 0.12077221274375916, "learning_rate": 1e-06, "loss": 0.0024, "step": 211 }, { "clip_ratio/high_max": 0.002666712556674611, "clip_ratio/high_mean": 0.001316230835072929, "clip_ratio/low_mean": 0.0013837130391038954, "clip_ratio/low_min": 0.0001587056594871683, "clip_ratio/region_mean": 0.002699943899642676, "epoch": 0.019786617200123667, "grad_norm": 0.11692502349615097, "learning_rate": 1e-06, "loss": -0.0286, "step": 212 }, { "clip_ratio/high_max": 0.002690632412850391, "clip_ratio/high_mean": 0.0013632758236781228, "clip_ratio/low_mean": 0.0015113556801225059, "clip_ratio/low_min": 0.00016518930533493403, "clip_ratio/region_mean": 0.0028746315656462684, "epoch": 0.01987995030012425, "grad_norm": 0.11587584018707275, "learning_rate": 1e-06, "loss": -0.0292, "step": 213 }, { "clip_ratio/high_max": 0.0028760911300196312, "clip_ratio/high_mean": 0.0012737659344566055, "clip_ratio/low_mean": 0.001509167817857815, "clip_ratio/low_min": 0.0002808178414852591, "clip_ratio/region_mean": 0.0027829337122966535, "epoch": 0.019973283400124834, "grad_norm": 0.11286624521017075, "learning_rate": 1e-06, "loss": -0.0086, "step": 214 }, { "clip_ratio/high_max": 0.002930935617769137, "clip_ratio/high_mean": 0.0012926538583997171, "clip_ratio/low_mean": 0.0015894604512141086, "clip_ratio/low_min": 0.0002447009601382888, "clip_ratio/region_mean": 0.002882114305975847, "epoch": 0.020066616500125416, "grad_norm": 0.11991050839424133, "learning_rate": 1e-06, "loss": 0.0149, "step": 215 }, { "clip_ratio/high_max": 0.0027271677245153114, "clip_ratio/high_mean": 0.0012860386668762658, "clip_ratio/low_mean": 0.0014214065304258838, "clip_ratio/low_min": 9.606748517398955e-05, "clip_ratio/region_mean": 0.0027074452373199165, "epoch": 0.020159949600126, "grad_norm": 0.1182783767580986, "learning_rate": 1e-06, "loss": 0.0202, "step": 216 }, { "clip_ratio/high_max": 0.0024879729317035526, "clip_ratio/high_mean": 0.0011563311818463262, "clip_ratio/low_mean": 0.0013710670536966063, "clip_ratio/low_min": 0.0001953144837898435, "clip_ratio/region_mean": 0.0025273981591453776, "epoch": 0.020253282700126583, "grad_norm": 0.11814951151609421, "learning_rate": 1e-06, "loss": 0.043, "step": 217 }, { "clip_ratio/high_max": 0.002947408640466165, "clip_ratio/high_mean": 0.0013484851369867101, "clip_ratio/low_mean": 0.0012738424993585795, "clip_ratio/low_min": 0.00017427248803869588, "clip_ratio/region_mean": 0.0026223276217933744, "epoch": 0.020346615800127168, "grad_norm": 0.12284554541110992, "learning_rate": 1e-06, "loss": 0.0069, "step": 218 }, { "clip_ratio/high_max": 0.0027303431561449543, "clip_ratio/high_mean": 0.0012052442434651311, "clip_ratio/low_mean": 0.001110031284042634, "clip_ratio/low_min": 0.00011965695830440382, "clip_ratio/region_mean": 0.0023152755384217016, "epoch": 0.02043994890012775, "grad_norm": 0.1070648580789566, "learning_rate": 1e-06, "loss": 0.008, "step": 219 }, { "clip_ratio/high_max": 0.003096452579484321, "clip_ratio/high_mean": 0.0013992584827065002, "clip_ratio/low_mean": 0.0012040935798722785, "clip_ratio/low_min": 0.00016171355036931345, "clip_ratio/region_mean": 0.0026033521280623972, "epoch": 0.02053328200012833, "grad_norm": 0.1266152560710907, "learning_rate": 1e-06, "loss": 0.0043, "step": 220 }, { "clip_ratio/high_max": 0.0025792756059672683, "clip_ratio/high_mean": 0.0012948244439030532, "clip_ratio/low_mean": 0.0014399615320144221, "clip_ratio/low_min": 0.00016513191440026276, "clip_ratio/region_mean": 0.0027347859868314117, "epoch": 0.020626615100128916, "grad_norm": 0.12148232758045197, "learning_rate": 1e-06, "loss": 0.0083, "step": 221 }, { "clip_ratio/high_max": 0.0025228388549294323, "clip_ratio/high_mean": 0.0010800192994793179, "clip_ratio/low_mean": 0.001403281421517022, "clip_ratio/low_min": 8.814985812932719e-05, "clip_ratio/region_mean": 0.002483300704625435, "epoch": 0.020719948200129498, "grad_norm": 0.11848271638154984, "learning_rate": 1e-06, "loss": 0.0395, "step": 222 }, { "clip_ratio/high_max": 0.0028991815488552675, "clip_ratio/high_mean": 0.0012010799437121022, "clip_ratio/low_mean": 0.0013955115791759454, "clip_ratio/low_min": 0.00021726133763877442, "clip_ratio/region_mean": 0.002596591497422196, "epoch": 0.020813281300130083, "grad_norm": 0.11115968972444534, "learning_rate": 1e-06, "loss": 0.0468, "step": 223 }, { "clip_ratio/high_max": 0.0026336591399740428, "clip_ratio/high_mean": 0.001224983723659534, "clip_ratio/low_mean": 0.0014509059074043762, "clip_ratio/low_min": 0.00024209196999436244, "clip_ratio/region_mean": 0.0026758896274259314, "epoch": 0.020906614400130665, "grad_norm": 0.12176219373941422, "learning_rate": 1e-06, "loss": 0.0215, "step": 224 }, { "clip_ratio/high_max": 0.002578550651378464, "clip_ratio/high_mean": 0.0011918759628315456, "clip_ratio/low_mean": 0.0014368755764735397, "clip_ratio/low_min": 0.0002343893993383972, "clip_ratio/region_mean": 0.0026287515574949794, "epoch": 0.02099994750013125, "grad_norm": 0.11891946196556091, "learning_rate": 1e-06, "loss": 0.0114, "step": 225 }, { "clip_ratio/high_max": 0.0027156503419973888, "clip_ratio/high_mean": 0.0011570048882276751, "clip_ratio/low_mean": 0.0015513499492953997, "clip_ratio/low_min": 0.0001733959979901556, "clip_ratio/region_mean": 0.002708354892092757, "epoch": 0.021093280600131832, "grad_norm": 0.1076967865228653, "learning_rate": 1e-06, "loss": 0.0255, "step": 226 }, { "clip_ratio/high_max": 0.0027221765121794306, "clip_ratio/high_mean": 0.0012483207101467997, "clip_ratio/low_mean": 0.0014540683041559532, "clip_ratio/low_min": 0.00020745715028169798, "clip_ratio/region_mean": 0.00270238899247488, "epoch": 0.021186613700132417, "grad_norm": 0.1128716692328453, "learning_rate": 1e-06, "loss": -0.0167, "step": 227 }, { "clip_ratio/high_max": 0.002753770248091314, "clip_ratio/high_mean": 0.001242840640770737, "clip_ratio/low_mean": 0.0013551625634136144, "clip_ratio/low_min": 0.00023705119474470848, "clip_ratio/region_mean": 0.0025980032587540336, "epoch": 0.021279946800133, "grad_norm": 0.11067776381969452, "learning_rate": 1e-06, "loss": 0.0408, "step": 228 }, { "clip_ratio/high_max": 0.0028592185844900087, "clip_ratio/high_mean": 0.001326610377873294, "clip_ratio/low_mean": 0.001492405961471377, "clip_ratio/low_min": 0.0001630086717341328, "clip_ratio/region_mean": 0.0028190163720864803, "epoch": 0.021373279900133584, "grad_norm": 0.12158869206905365, "learning_rate": 1e-06, "loss": 0.0098, "step": 229 }, { "clip_ratio/high_max": 0.002937079145340249, "clip_ratio/high_mean": 0.0012737819306494202, "clip_ratio/low_mean": 0.0014161024591885507, "clip_ratio/low_min": 0.0002785625792967039, "clip_ratio/region_mean": 0.0026898844225797802, "epoch": 0.021466613000134166, "grad_norm": 0.12741751968860626, "learning_rate": 1e-06, "loss": 0.0316, "step": 230 }, { "clip_ratio/high_max": 0.0031803894307813607, "clip_ratio/high_mean": 0.0014429756774916314, "clip_ratio/low_mean": 0.0012438033445505425, "clip_ratio/low_min": 0.00024903894427552586, "clip_ratio/region_mean": 0.002686778978386428, "epoch": 0.02155994610013475, "grad_norm": 0.1174270361661911, "learning_rate": 1e-06, "loss": 0.0148, "step": 231 }, { "clip_ratio/high_max": 0.002933542364189634, "clip_ratio/high_mean": 0.0012495396276790416, "clip_ratio/low_mean": 0.0014253993394959252, "clip_ratio/low_min": 0.0002664927314981469, "clip_ratio/region_mean": 0.002674939052667469, "epoch": 0.021653279200135333, "grad_norm": 0.11752340197563171, "learning_rate": 1e-06, "loss": 0.0594, "step": 232 }, { "clip_ratio/high_max": 0.0029448304485413246, "clip_ratio/high_mean": 0.0013079808268230408, "clip_ratio/low_mean": 0.0013072635119897313, "clip_ratio/low_min": 0.00020943713843735168, "clip_ratio/region_mean": 0.002615244382468518, "epoch": 0.021746612300135915, "grad_norm": 0.11533144861459732, "learning_rate": 1e-06, "loss": 0.0221, "step": 233 }, { "clip_ratio/high_max": 0.002893707096518483, "clip_ratio/high_mean": 0.0012507598985393997, "clip_ratio/low_mean": 0.00145119241278735, "clip_ratio/low_min": 0.000206605081075395, "clip_ratio/region_mean": 0.0027019523113267496, "epoch": 0.0218399454001365, "grad_norm": 0.1245647668838501, "learning_rate": 1e-06, "loss": 0.0099, "step": 234 }, { "clip_ratio/high_max": 0.002663630344613921, "clip_ratio/high_mean": 0.0012397000027704053, "clip_ratio/low_mean": 0.0014314172694867011, "clip_ratio/low_min": 0.00011440727666922612, "clip_ratio/region_mean": 0.0026711172176874243, "epoch": 0.02193327850013708, "grad_norm": 0.11414322257041931, "learning_rate": 1e-06, "loss": 0.0126, "step": 235 }, { "clip_ratio/high_max": 0.0029554450447903946, "clip_ratio/high_mean": 0.001208468915137928, "clip_ratio/low_mean": 0.0013771112535323482, "clip_ratio/low_min": 0.0002789856989693362, "clip_ratio/region_mean": 0.002585580099548679, "epoch": 0.022026611600137667, "grad_norm": 0.11437011510133743, "learning_rate": 1e-06, "loss": -0.0145, "step": 236 }, { "clip_ratio/high_max": 0.0026809996415977366, "clip_ratio/high_mean": 0.0012158017743786331, "clip_ratio/low_mean": 0.0015558118466287851, "clip_ratio/low_min": 0.00024096857305266894, "clip_ratio/region_mean": 0.002771613624645397, "epoch": 0.02211994470013825, "grad_norm": 0.11856529116630554, "learning_rate": 1e-06, "loss": 0.0067, "step": 237 }, { "clip_ratio/high_max": 0.0026812001124199014, "clip_ratio/high_mean": 0.0010683003083613585, "clip_ratio/low_mean": 0.0016891086670511868, "clip_ratio/low_min": 0.00021632341304211877, "clip_ratio/region_mean": 0.0027574089472182095, "epoch": 0.022213277800138834, "grad_norm": 0.11973568797111511, "learning_rate": 1e-06, "loss": 0.0892, "step": 238 }, { "clip_ratio/high_max": 0.0029301593312993646, "clip_ratio/high_mean": 0.001317656358878594, "clip_ratio/low_mean": 0.0013663338759215549, "clip_ratio/low_min": 8.015544881345704e-05, "clip_ratio/region_mean": 0.0026839902566280216, "epoch": 0.022306610900139415, "grad_norm": 0.11149156093597412, "learning_rate": 1e-06, "loss": -0.0059, "step": 239 }, { "clip_ratio/high_max": 0.002814397055772133, "clip_ratio/high_mean": 0.0012506991661211941, "clip_ratio/low_mean": 0.001508244858996477, "clip_ratio/low_min": 0.00015515810628130566, "clip_ratio/region_mean": 0.0027589439632720314, "epoch": 0.02239994400014, "grad_norm": 0.12311536073684692, "learning_rate": 1e-06, "loss": 0.0207, "step": 240 }, { "clip_ratio/high_max": 0.003241408929170575, "clip_ratio/high_mean": 0.0013674206020368729, "clip_ratio/low_mean": 0.0011476982654130552, "clip_ratio/low_min": 0.00011804434052464785, "clip_ratio/region_mean": 0.002515118867449928, "epoch": 0.022493277100140582, "grad_norm": 0.12148340046405792, "learning_rate": 1e-06, "loss": -0.0134, "step": 241 }, { "clip_ratio/high_max": 0.0028867476430605166, "clip_ratio/high_mean": 0.0012265812802070286, "clip_ratio/low_mean": 0.0014747263558092527, "clip_ratio/low_min": 0.00014939865286578424, "clip_ratio/region_mean": 0.0027013076469302177, "epoch": 0.022586610200141168, "grad_norm": 0.11273916065692902, "learning_rate": 1e-06, "loss": 0.0243, "step": 242 }, { "clip_ratio/high_max": 0.002602900640340522, "clip_ratio/high_mean": 0.0012631772260647267, "clip_ratio/low_mean": 0.0013574150907516014, "clip_ratio/low_min": 0.00017087049309338909, "clip_ratio/region_mean": 0.002620592335006222, "epoch": 0.02267994330014175, "grad_norm": 0.11515071243047714, "learning_rate": 1e-06, "loss": 0.0078, "step": 243 }, { "clip_ratio/high_max": 0.0030365166894625872, "clip_ratio/high_mean": 0.0013074428352410905, "clip_ratio/low_mean": 0.0013274852317408659, "clip_ratio/low_min": 0.00014806052149651805, "clip_ratio/region_mean": 0.002634928052430041, "epoch": 0.022773276400142334, "grad_norm": 0.11722594499588013, "learning_rate": 1e-06, "loss": 0.0074, "step": 244 }, { "clip_ratio/high_max": 0.002536213396524545, "clip_ratio/high_mean": 0.0011760884008253925, "clip_ratio/low_mean": 0.0015724250297353137, "clip_ratio/low_min": 0.00025744376307557104, "clip_ratio/region_mean": 0.0027485134196467698, "epoch": 0.022866609500142916, "grad_norm": 0.1268276870250702, "learning_rate": 1e-06, "loss": 0.083, "step": 245 }, { "clip_ratio/high_max": 0.002958224016765598, "clip_ratio/high_mean": 0.001265724618860986, "clip_ratio/low_mean": 0.0014083201058383565, "clip_ratio/low_min": 0.00018465370885678567, "clip_ratio/region_mean": 0.0026740447647171095, "epoch": 0.022959942600143498, "grad_norm": 0.12519189715385437, "learning_rate": 1e-06, "loss": 0.0354, "step": 246 }, { "clip_ratio/high_max": 0.002893728429626208, "clip_ratio/high_mean": 0.0013277997677505482, "clip_ratio/low_mean": 0.0014527560051647015, "clip_ratio/low_min": 0.00021506683242478175, "clip_ratio/region_mean": 0.0027805557183455676, "epoch": 0.023053275700144083, "grad_norm": 0.1117105707526207, "learning_rate": 1e-06, "loss": 0.0108, "step": 247 }, { "clip_ratio/high_max": 0.002741170108492952, "clip_ratio/high_mean": 0.0013270340459712315, "clip_ratio/low_mean": 0.0013807948562316597, "clip_ratio/low_min": 0.00013223127461969852, "clip_ratio/region_mean": 0.0027078288912889548, "epoch": 0.023146608800144665, "grad_norm": 0.1205824613571167, "learning_rate": 1e-06, "loss": -0.0088, "step": 248 }, { "clip_ratio/high_max": 0.0032962685509119183, "clip_ratio/high_mean": 0.0013764514042122755, "clip_ratio/low_mean": 0.0015203620096144732, "clip_ratio/low_min": 0.00027015406158170663, "clip_ratio/region_mean": 0.0028968133847229183, "epoch": 0.02323994190014525, "grad_norm": 0.12086623162031174, "learning_rate": 1e-06, "loss": 0.0223, "step": 249 }, { "clip_ratio/high_max": 0.002714697089686524, "clip_ratio/high_mean": 0.0011880185520567466, "clip_ratio/low_mean": 0.0014526119703077711, "clip_ratio/low_min": 0.0002161014035664266, "clip_ratio/region_mean": 0.002640630496898666, "epoch": 0.023333275000145832, "grad_norm": 0.10611660033464432, "learning_rate": 1e-06, "loss": 0.0299, "step": 250 }, { "clip_ratio/high_max": 0.0026539059908827767, "clip_ratio/high_mean": 0.0012327325639489572, "clip_ratio/low_mean": 0.0014886019998812117, "clip_ratio/low_min": 0.00017664152255747467, "clip_ratio/region_mean": 0.0027213345747441053, "epoch": 0.023426608100146417, "grad_norm": 0.10782354325056076, "learning_rate": 1e-06, "loss": 0.0537, "step": 251 }, { "clip_ratio/high_max": 0.0031486807274632156, "clip_ratio/high_mean": 0.001254881797649432, "clip_ratio/low_mean": 0.0014445605629589409, "clip_ratio/low_min": 0.00010275974727846915, "clip_ratio/region_mean": 0.0026994424042641185, "epoch": 0.023519941200147, "grad_norm": 0.1270337551832199, "learning_rate": 1e-06, "loss": -0.0053, "step": 252 }, { "clip_ratio/high_max": 0.002889482297177892, "clip_ratio/high_mean": 0.0012911734738736413, "clip_ratio/low_mean": 0.0015569323622912634, "clip_ratio/low_min": 0.00020511935053946218, "clip_ratio/region_mean": 0.0028481058252509683, "epoch": 0.023613274300147584, "grad_norm": 0.11966459453105927, "learning_rate": 1e-06, "loss": 0.0178, "step": 253 }, { "clip_ratio/high_max": 0.002719169919146225, "clip_ratio/high_mean": 0.0013231332013674546, "clip_ratio/low_mean": 0.0017619675782043487, "clip_ratio/low_min": 0.00038492104522447335, "clip_ratio/region_mean": 0.0030851007904857397, "epoch": 0.023706607400148166, "grad_norm": 0.13348238170146942, "learning_rate": 1e-06, "loss": 0.0227, "step": 254 }, { "clip_ratio/high_max": 0.0027464530139695853, "clip_ratio/high_mean": 0.0012679077153734397, "clip_ratio/low_mean": 0.0014296650879259687, "clip_ratio/low_min": 0.00025208152965205954, "clip_ratio/region_mean": 0.0026975728251272812, "epoch": 0.02379994050014875, "grad_norm": 0.11844424158334732, "learning_rate": 1e-06, "loss": 0.0321, "step": 255 }, { "clip_ratio/high_max": 0.002598723782284651, "clip_ratio/high_mean": 0.001076618198567303, "clip_ratio/low_mean": 0.0015559640887659043, "clip_ratio/low_min": 0.00040255888870888157, "clip_ratio/region_mean": 0.0026325823055231012, "epoch": 0.023893273600149333, "grad_norm": 0.1172008290886879, "learning_rate": 1e-06, "loss": 0.0971, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014020647321428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 606.3728637695312, "completions/mean_terminated_length": 556.7503051757812, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.023986606700149918, "grad_norm": 0.13878346979618073, "learning_rate": 1e-06, "loss": 0.0479, "num_tokens": 242343459.0, "reward": 0.5473458766937256, "reward_std": 0.22819411754608154, "rewards/simpleverify_reward/mean": 0.5473458170890808, "rewards/simpleverify_reward/std": 0.4977554976940155, "step": 257 }, { "clip_ratio/high_max": 0.002318706101505086, "clip_ratio/high_mean": 0.0009321549168817, "clip_ratio/low_mean": 0.0006447340729209827, "clip_ratio/low_min": 5.173052704776637e-05, "clip_ratio/region_mean": 0.0015768890079925768, "epoch": 0.0240799398001505, "grad_norm": 0.13692863285541534, "learning_rate": 1e-06, "loss": 0.0519, "step": 258 }, { "clip_ratio/high_max": 0.0022346855184878223, "clip_ratio/high_mean": 0.001037432604789501, "clip_ratio/low_mean": 0.0005807069028378464, "clip_ratio/low_min": 3.277349878771929e-05, "clip_ratio/region_mean": 0.0016181395476451144, "epoch": 0.02417327290015108, "grad_norm": 0.1287120282649994, "learning_rate": 1e-06, "loss": -0.0069, "step": 259 }, { "clip_ratio/high_max": 0.0019780331131187268, "clip_ratio/high_mean": 0.0007728182972641662, "clip_ratio/low_mean": 0.0006453432533817249, "clip_ratio/low_min": 3.711226236191578e-05, "clip_ratio/region_mean": 0.001418161547917407, "epoch": 0.024266606000151666, "grad_norm": 0.1013762354850769, "learning_rate": 1e-06, "loss": 0.0459, "step": 260 }, { "clip_ratio/high_max": 0.002177521473640809, "clip_ratio/high_mean": 0.000954049093706999, "clip_ratio/low_mean": 0.0006498518005173537, "clip_ratio/low_min": 5.5541762776556425e-05, "clip_ratio/region_mean": 0.0016039009096857626, "epoch": 0.024359939100152248, "grad_norm": 0.11394771188497543, "learning_rate": 1e-06, "loss": -0.032, "step": 261 }, { "clip_ratio/high_max": 0.002149220817955211, "clip_ratio/high_mean": 0.0009187996620312333, "clip_ratio/low_mean": 0.0008728065549803432, "clip_ratio/low_min": 0.00010882023434533039, "clip_ratio/region_mean": 0.001791606227925513, "epoch": 0.024453272200152833, "grad_norm": 0.12061510980129242, "learning_rate": 1e-06, "loss": 0.046, "step": 262 }, { "clip_ratio/high_max": 0.00240359095914755, "clip_ratio/high_mean": 0.0010290763493685517, "clip_ratio/low_mean": 0.0010250703071505995, "clip_ratio/low_min": 0.000153132894411101, "clip_ratio/region_mean": 0.0020541466001304798, "epoch": 0.024546605300153415, "grad_norm": 0.11530864983797073, "learning_rate": 1e-06, "loss": 0.0318, "step": 263 }, { "clip_ratio/high_max": 0.002490508930350188, "clip_ratio/high_mean": 0.0011761773894249927, "clip_ratio/low_mean": 0.0009135854343185201, "clip_ratio/low_min": 0.00011792420264100656, "clip_ratio/region_mean": 0.002089762856485322, "epoch": 0.024639938400154, "grad_norm": 0.12418489903211594, "learning_rate": 1e-06, "loss": 0.0057, "step": 264 }, { "clip_ratio/high_max": 0.0025673400232335553, "clip_ratio/high_mean": 0.001129146739913267, "clip_ratio/low_mean": 0.001020339612296084, "clip_ratio/low_min": 0.0001122549165302189, "clip_ratio/region_mean": 0.0021494863103725947, "epoch": 0.024733271500154582, "grad_norm": 0.12576733529567719, "learning_rate": 1e-06, "loss": 0.0559, "step": 265 }, { "clip_ratio/high_max": 0.002444140314764809, "clip_ratio/high_mean": 0.0010282442162861116, "clip_ratio/low_mean": 0.0010951087588182418, "clip_ratio/low_min": 0.00013668149222212378, "clip_ratio/region_mean": 0.0021233530205790885, "epoch": 0.024826604600155167, "grad_norm": 0.12605029344558716, "learning_rate": 1e-06, "loss": 0.0561, "step": 266 }, { "clip_ratio/high_max": 0.002505919932445977, "clip_ratio/high_mean": 0.0010706119082897203, "clip_ratio/low_mean": 0.001019883970002411, "clip_ratio/low_min": 8.928609531722032e-05, "clip_ratio/region_mean": 0.002090495872835163, "epoch": 0.02491993770015575, "grad_norm": 0.11480946093797684, "learning_rate": 1e-06, "loss": 0.0429, "step": 267 }, { "clip_ratio/high_max": 0.002146953935152851, "clip_ratio/high_mean": 0.0009213567136612255, "clip_ratio/low_mean": 0.0008541341776435729, "clip_ratio/low_min": 0.00012193677412142279, "clip_ratio/region_mean": 0.001775490898580756, "epoch": 0.025013270800156334, "grad_norm": 0.11736516654491425, "learning_rate": 1e-06, "loss": 0.0347, "step": 268 }, { "clip_ratio/high_max": 0.002444287318212446, "clip_ratio/high_mean": 0.0009080634717975045, "clip_ratio/low_mean": 0.0008916878086893121, "clip_ratio/low_min": 8.94487575351377e-05, "clip_ratio/region_mean": 0.0017997512404690497, "epoch": 0.025106603900156916, "grad_norm": 0.10688665509223938, "learning_rate": 1e-06, "loss": 0.05, "step": 269 }, { "clip_ratio/high_max": 0.0022405747331504244, "clip_ratio/high_mean": 0.0009181412024190649, "clip_ratio/low_mean": 0.0008927080343710259, "clip_ratio/low_min": 0.00020293468332965858, "clip_ratio/region_mean": 0.0018108492731698789, "epoch": 0.0251999370001575, "grad_norm": 0.11290182173252106, "learning_rate": 1e-06, "loss": 0.0481, "step": 270 }, { "clip_ratio/high_max": 0.0024090096485451795, "clip_ratio/high_mean": 0.0011666065765894018, "clip_ratio/low_mean": 0.0008310027260449715, "clip_ratio/low_min": 2.82986902675475e-05, "clip_ratio/region_mean": 0.001997609288082458, "epoch": 0.025293270100158083, "grad_norm": 0.12206444889307022, "learning_rate": 1e-06, "loss": 0.0006, "step": 271 }, { "clip_ratio/high_max": 0.0019530435238266364, "clip_ratio/high_mean": 0.0009773091514944099, "clip_ratio/low_mean": 0.000938744269660674, "clip_ratio/low_min": 9.81302964646602e-05, "clip_ratio/region_mean": 0.001916053399327211, "epoch": 0.025386603200158665, "grad_norm": 0.12658779323101044, "learning_rate": 1e-06, "loss": 0.042, "step": 272 }, { "clip_ratio/high_max": 0.0025241987968911417, "clip_ratio/high_mean": 0.0010535933797655161, "clip_ratio/low_mean": 0.001068296391167678, "clip_ratio/low_min": 0.00014435025423154002, "clip_ratio/region_mean": 0.0021218897600192577, "epoch": 0.02547993630015925, "grad_norm": 0.11415829509496689, "learning_rate": 1e-06, "loss": 0.0148, "step": 273 }, { "clip_ratio/high_max": 0.002588375165942125, "clip_ratio/high_mean": 0.0011596155563893262, "clip_ratio/low_mean": 0.0008853486688167322, "clip_ratio/low_min": 9.122736810240895e-05, "clip_ratio/region_mean": 0.0020449642106541432, "epoch": 0.02557326940015983, "grad_norm": 0.11924593895673752, "learning_rate": 1e-06, "loss": -0.0118, "step": 274 }, { "clip_ratio/high_max": 0.0022943424264667556, "clip_ratio/high_mean": 0.0009754979764693417, "clip_ratio/low_mean": 0.0011792521363531705, "clip_ratio/low_min": 0.00024328667132067494, "clip_ratio/region_mean": 0.0021547501455643214, "epoch": 0.025666602500160417, "grad_norm": 0.12239724397659302, "learning_rate": 1e-06, "loss": 0.0906, "step": 275 }, { "clip_ratio/high_max": 0.0024246125904028304, "clip_ratio/high_mean": 0.001187376692541875, "clip_ratio/low_mean": 0.0010666898979252437, "clip_ratio/low_min": 0.00015142666052270215, "clip_ratio/region_mean": 0.0022540666541317478, "epoch": 0.025759935600161, "grad_norm": 0.1227109357714653, "learning_rate": 1e-06, "loss": -0.0087, "step": 276 }, { "clip_ratio/high_max": 0.0025024011702043936, "clip_ratio/high_mean": 0.0011753052822314203, "clip_ratio/low_mean": 0.0010576730783213861, "clip_ratio/low_min": 9.673169734014664e-05, "clip_ratio/region_mean": 0.002232978353276849, "epoch": 0.025853268700161584, "grad_norm": 0.11423332244157791, "learning_rate": 1e-06, "loss": 0.0091, "step": 277 }, { "clip_ratio/high_max": 0.002409491644357331, "clip_ratio/high_mean": 0.0011441790647950256, "clip_ratio/low_mean": 0.001143206340202596, "clip_ratio/low_min": 0.00014949106298445258, "clip_ratio/region_mean": 0.0022873853231430985, "epoch": 0.025946601800162165, "grad_norm": 0.11554890125989914, "learning_rate": 1e-06, "loss": -0.0285, "step": 278 }, { "clip_ratio/high_max": 0.0025720396006363444, "clip_ratio/high_mean": 0.0011215429731237236, "clip_ratio/low_mean": 0.0011445862292021047, "clip_ratio/low_min": 9.396823588758707e-05, "clip_ratio/region_mean": 0.0022661291441181675, "epoch": 0.02603993490016275, "grad_norm": 0.11064556986093521, "learning_rate": 1e-06, "loss": -0.0037, "step": 279 }, { "clip_ratio/high_max": 0.002317258797120303, "clip_ratio/high_mean": 0.0010377992839494254, "clip_ratio/low_mean": 0.0011915607374248793, "clip_ratio/low_min": 0.00012314695413806476, "clip_ratio/region_mean": 0.002229359990451485, "epoch": 0.026133268000163332, "grad_norm": 0.12614858150482178, "learning_rate": 1e-06, "loss": 0.0042, "step": 280 }, { "clip_ratio/high_max": 0.0024208695322158746, "clip_ratio/high_mean": 0.0009814514733079704, "clip_ratio/low_mean": 0.0012528287588793319, "clip_ratio/low_min": 0.00010306581862096209, "clip_ratio/region_mean": 0.002234280233096797, "epoch": 0.026226601100163918, "grad_norm": 0.1160554587841034, "learning_rate": 1e-06, "loss": 0.0203, "step": 281 }, { "clip_ratio/high_max": 0.0023500206334574614, "clip_ratio/high_mean": 0.0010458977503731148, "clip_ratio/low_mean": 0.0011094500969193177, "clip_ratio/low_min": 8.827497367747128e-05, "clip_ratio/region_mean": 0.0021553478873101994, "epoch": 0.0263199342001645, "grad_norm": 0.11173048615455627, "learning_rate": 1e-06, "loss": 0.0211, "step": 282 }, { "clip_ratio/high_max": 0.0022284902370302007, "clip_ratio/high_mean": 0.0010493646113900468, "clip_ratio/low_mean": 0.0011318081051285844, "clip_ratio/low_min": 0.00012473552123992704, "clip_ratio/region_mean": 0.002181172756536398, "epoch": 0.026413267300165084, "grad_norm": 0.12013093382120132, "learning_rate": 1e-06, "loss": 0.0261, "step": 283 }, { "clip_ratio/high_max": 0.0027971367380814627, "clip_ratio/high_mean": 0.001220919877596316, "clip_ratio/low_mean": 0.0011303797909931745, "clip_ratio/low_min": 0.00014531417218677234, "clip_ratio/region_mean": 0.0023512996558565646, "epoch": 0.026506600400165666, "grad_norm": 0.12292158603668213, "learning_rate": 1e-06, "loss": -0.011, "step": 284 }, { "clip_ratio/high_max": 0.0023757604940328747, "clip_ratio/high_mean": 0.0009779406964298687, "clip_ratio/low_mean": 0.001226168911671266, "clip_ratio/low_min": 0.00017896837744046934, "clip_ratio/region_mean": 0.0022041096235625446, "epoch": 0.026599933500166248, "grad_norm": 0.11741437762975693, "learning_rate": 1e-06, "loss": 0.0373, "step": 285 }, { "clip_ratio/high_max": 0.0026118098103324883, "clip_ratio/high_mean": 0.0011304804720566608, "clip_ratio/low_mean": 0.0011518621704453835, "clip_ratio/low_min": 7.186270431702724e-05, "clip_ratio/region_mean": 0.002282342677062843, "epoch": 0.026693266600166833, "grad_norm": 0.12293212860822678, "learning_rate": 1e-06, "loss": -0.0149, "step": 286 }, { "clip_ratio/high_max": 0.002940037113148719, "clip_ratio/high_mean": 0.0011611158224695828, "clip_ratio/low_mean": 0.0012260290022823028, "clip_ratio/low_min": 0.00024637759725010255, "clip_ratio/region_mean": 0.002387144777458161, "epoch": 0.026786599700167415, "grad_norm": 0.1197468563914299, "learning_rate": 1e-06, "loss": 0.021, "step": 287 }, { "clip_ratio/high_max": 0.0025703799474285915, "clip_ratio/high_mean": 0.0011514400066516828, "clip_ratio/low_mean": 0.0012723642939818092, "clip_ratio/low_min": 0.00012272383537492715, "clip_ratio/region_mean": 0.0024238042751676403, "epoch": 0.026879932800168, "grad_norm": 0.1248224601149559, "learning_rate": 1e-06, "loss": -0.0219, "step": 288 }, { "clip_ratio/high_max": 0.002599159473902546, "clip_ratio/high_mean": 0.0011211236342205666, "clip_ratio/low_mean": 0.0012730159505736083, "clip_ratio/low_min": 9.851210597844329e-05, "clip_ratio/region_mean": 0.002394139548414387, "epoch": 0.026973265900168582, "grad_norm": 0.12003982067108154, "learning_rate": 1e-06, "loss": 0.0327, "step": 289 }, { "clip_ratio/high_max": 0.002249806304462254, "clip_ratio/high_mean": 0.0009766307084646542, "clip_ratio/low_mean": 0.0013646035422425484, "clip_ratio/low_min": 0.00021926810950390063, "clip_ratio/region_mean": 0.002341234234336298, "epoch": 0.027066599000169167, "grad_norm": 0.11406214535236359, "learning_rate": 1e-06, "loss": 0.0468, "step": 290 }, { "clip_ratio/high_max": 0.0027012326536350884, "clip_ratio/high_mean": 0.0011246063477301504, "clip_ratio/low_mean": 0.0013270542185637169, "clip_ratio/low_min": 0.00013454911277221981, "clip_ratio/region_mean": 0.002451660533552058, "epoch": 0.02715993210016975, "grad_norm": 0.12362117320299149, "learning_rate": 1e-06, "loss": 0.0216, "step": 291 }, { "clip_ratio/high_max": 0.0024649514198245015, "clip_ratio/high_mean": 0.0010324402755941264, "clip_ratio/low_mean": 0.0014683713015983813, "clip_ratio/low_min": 0.00016491775295435218, "clip_ratio/region_mean": 0.002500811511708889, "epoch": 0.027253265200170334, "grad_norm": 0.12036436051130295, "learning_rate": 1e-06, "loss": 0.0537, "step": 292 }, { "clip_ratio/high_max": 0.0023901978456706274, "clip_ratio/high_mean": 0.0010643073719620588, "clip_ratio/low_mean": 0.001386093190376414, "clip_ratio/low_min": 0.00015086051098478492, "clip_ratio/region_mean": 0.0024504005268681794, "epoch": 0.027346598300170916, "grad_norm": 0.12816454470157623, "learning_rate": 1e-06, "loss": 0.0315, "step": 293 }, { "clip_ratio/high_max": 0.0026513835109653883, "clip_ratio/high_mean": 0.0011005796204699436, "clip_ratio/low_mean": 0.0012435173666744959, "clip_ratio/low_min": 0.00012286496348679066, "clip_ratio/region_mean": 0.0023440970107913017, "epoch": 0.0274399314001715, "grad_norm": 0.11072500050067902, "learning_rate": 1e-06, "loss": 0.0417, "step": 294 }, { "clip_ratio/high_max": 0.0023924853521748446, "clip_ratio/high_mean": 0.0010282583698426606, "clip_ratio/low_mean": 0.0012225905193190556, "clip_ratio/low_min": 0.00016921822407311993, "clip_ratio/region_mean": 0.0022508489273604937, "epoch": 0.027533264500172083, "grad_norm": 0.12008463591337204, "learning_rate": 1e-06, "loss": 0.0546, "step": 295 }, { "clip_ratio/high_max": 0.0024777651924523525, "clip_ratio/high_mean": 0.0010775557930173818, "clip_ratio/low_mean": 0.001231388043379411, "clip_ratio/low_min": 0.00018410832126392052, "clip_ratio/region_mean": 0.0023089438400347717, "epoch": 0.027626597600172668, "grad_norm": 0.11875315755605698, "learning_rate": 1e-06, "loss": 0.0054, "step": 296 }, { "clip_ratio/high_max": 0.0022739601699868217, "clip_ratio/high_mean": 0.0010673532051441725, "clip_ratio/low_mean": 0.0011142037074023392, "clip_ratio/low_min": 0.00020274594407965196, "clip_ratio/region_mean": 0.0021815569125465117, "epoch": 0.02771993070017325, "grad_norm": 0.1161278784275055, "learning_rate": 1e-06, "loss": 0.0203, "step": 297 }, { "clip_ratio/high_max": 0.002784185518976301, "clip_ratio/high_mean": 0.001252472629857948, "clip_ratio/low_mean": 0.0013023063074797392, "clip_ratio/low_min": 5.17031985509675e-05, "clip_ratio/region_mean": 0.0025547789555275813, "epoch": 0.02781326380017383, "grad_norm": 0.11731606721878052, "learning_rate": 1e-06, "loss": 0.0207, "step": 298 }, { "clip_ratio/high_max": 0.0021159514326427598, "clip_ratio/high_mean": 0.0010194711121584987, "clip_ratio/low_mean": 0.001165938207122963, "clip_ratio/low_min": 0.0001392071517329896, "clip_ratio/region_mean": 0.0021854093720321544, "epoch": 0.027906596900174416, "grad_norm": 0.42902833223342896, "learning_rate": 1e-06, "loss": 0.024, "step": 299 }, { "clip_ratio/high_max": 0.0025800122602959163, "clip_ratio/high_mean": 0.0010770422741188668, "clip_ratio/low_mean": 0.00122316302076797, "clip_ratio/low_min": 0.00013648294498125324, "clip_ratio/region_mean": 0.0023002052621450275, "epoch": 0.027999930000174998, "grad_norm": 0.10511124134063721, "learning_rate": 1e-06, "loss": 0.0001, "step": 300 }, { "clip_ratio/high_max": 0.002678452874533832, "clip_ratio/high_mean": 0.0011743729719455587, "clip_ratio/low_mean": 0.0009771113655006047, "clip_ratio/low_min": 7.334643396461615e-05, "clip_ratio/region_mean": 0.0021514844047487713, "epoch": 0.028093263100175583, "grad_norm": 0.11369849741458893, "learning_rate": 1e-06, "loss": 0.0011, "step": 301 }, { "clip_ratio/high_max": 0.002597427723230794, "clip_ratio/high_mean": 0.001141971641118289, "clip_ratio/low_mean": 0.0014551690692314878, "clip_ratio/low_min": 0.0002136166876880452, "clip_ratio/region_mean": 0.002597140693978872, "epoch": 0.028186596200176165, "grad_norm": 0.12733303010463715, "learning_rate": 1e-06, "loss": 0.0685, "step": 302 }, { "clip_ratio/high_max": 0.002422091525659198, "clip_ratio/high_mean": 0.0010322401703888318, "clip_ratio/low_mean": 0.0014132260002952535, "clip_ratio/low_min": 0.0001757511909090681, "clip_ratio/region_mean": 0.0024454661397612654, "epoch": 0.02827992930017675, "grad_norm": 0.11881496012210846, "learning_rate": 1e-06, "loss": 0.0089, "step": 303 }, { "clip_ratio/high_max": 0.0026955693174386397, "clip_ratio/high_mean": 0.0012299519730731845, "clip_ratio/low_mean": 0.001312518390477635, "clip_ratio/low_min": 8.885224451660179e-05, "clip_ratio/region_mean": 0.0025424703999306075, "epoch": 0.028373262400177332, "grad_norm": 0.11214599758386612, "learning_rate": 1e-06, "loss": 0.0207, "step": 304 }, { "clip_ratio/high_max": 0.0030520404980052263, "clip_ratio/high_mean": 0.0012664409950957634, "clip_ratio/low_mean": 0.0012687414782703854, "clip_ratio/low_min": 0.00025071760228456696, "clip_ratio/region_mean": 0.0025351825315738097, "epoch": 0.028466595500177917, "grad_norm": 0.12294237315654755, "learning_rate": 1e-06, "loss": 0.0428, "step": 305 }, { "clip_ratio/high_max": 0.002863376423192676, "clip_ratio/high_mean": 0.001243401042302139, "clip_ratio/low_mean": 0.0013304653075465467, "clip_ratio/low_min": 0.00011821213593066204, "clip_ratio/region_mean": 0.002573866324382834, "epoch": 0.0285599286001785, "grad_norm": 0.1343970000743866, "learning_rate": 1e-06, "loss": 0.002, "step": 306 }, { "clip_ratio/high_max": 0.0023996468080440536, "clip_ratio/high_mean": 0.001076956312317634, "clip_ratio/low_mean": 0.0012048578028043266, "clip_ratio/low_min": 0.00012952494034834672, "clip_ratio/region_mean": 0.0022818141733296216, "epoch": 0.028653261700179084, "grad_norm": 0.10756761580705643, "learning_rate": 1e-06, "loss": -0.0002, "step": 307 }, { "clip_ratio/high_max": 0.0022127933443698566, "clip_ratio/high_mean": 0.0010018030552600976, "clip_ratio/low_mean": 0.001273589994525537, "clip_ratio/low_min": 0.0001267386260224157, "clip_ratio/region_mean": 0.0022753930679755285, "epoch": 0.028746594800179666, "grad_norm": 0.11581108719110489, "learning_rate": 1e-06, "loss": 0.0579, "step": 308 }, { "clip_ratio/high_max": 0.002870077354600653, "clip_ratio/high_mean": 0.0011821799562312663, "clip_ratio/low_mean": 0.0015533137411694042, "clip_ratio/low_min": 0.00018565398204373196, "clip_ratio/region_mean": 0.002735493704676628, "epoch": 0.02883992790018025, "grad_norm": 0.11946620047092438, "learning_rate": 1e-06, "loss": 0.0303, "step": 309 }, { "clip_ratio/high_max": 0.0028245630965102464, "clip_ratio/high_mean": 0.0011898544362338725, "clip_ratio/low_mean": 0.001317115682468284, "clip_ratio/low_min": 0.00017805923789637745, "clip_ratio/region_mean": 0.0025069700641324744, "epoch": 0.028933261000180833, "grad_norm": 0.11811016499996185, "learning_rate": 1e-06, "loss": 0.007, "step": 310 }, { "clip_ratio/high_max": 0.002491500199539587, "clip_ratio/high_mean": 0.0011784347443608567, "clip_ratio/low_mean": 0.0012403953915054444, "clip_ratio/low_min": 0.00017528533408039948, "clip_ratio/region_mean": 0.0024188301686081104, "epoch": 0.029026594100181415, "grad_norm": 0.12050150334835052, "learning_rate": 1e-06, "loss": 0.0164, "step": 311 }, { "clip_ratio/high_max": 0.0028014828567393124, "clip_ratio/high_mean": 0.001221913451445289, "clip_ratio/low_mean": 0.0012070527009200305, "clip_ratio/low_min": 8.27523872430902e-05, "clip_ratio/region_mean": 0.002428966145089362, "epoch": 0.029119927200182, "grad_norm": 0.1260824203491211, "learning_rate": 1e-06, "loss": 0.0046, "step": 312 }, { "clip_ratio/high_max": 0.0024019197953748517, "clip_ratio/high_mean": 0.0010809826126205735, "clip_ratio/low_mean": 0.0012612868267751765, "clip_ratio/low_min": 0.0003237895361962728, "clip_ratio/region_mean": 0.0023422694866894744, "epoch": 0.02921326030018258, "grad_norm": 0.10993025451898575, "learning_rate": 1e-06, "loss": 0.0532, "step": 313 }, { "clip_ratio/high_max": 0.0026066493446705863, "clip_ratio/high_mean": 0.0011838960181194125, "clip_ratio/low_mean": 0.0013028596549702343, "clip_ratio/low_min": 0.00019492600586090703, "clip_ratio/region_mean": 0.0024867557294783182, "epoch": 0.029306593400183167, "grad_norm": 0.11327281594276428, "learning_rate": 1e-06, "loss": 0.0572, "step": 314 }, { "clip_ratio/high_max": 0.0023374292431981303, "clip_ratio/high_mean": 0.0010577563261904288, "clip_ratio/low_mean": 0.0014068042182771023, "clip_ratio/low_min": 0.0001055463344528107, "clip_ratio/region_mean": 0.002464560529915616, "epoch": 0.02939992650018375, "grad_norm": 0.1130322590470314, "learning_rate": 1e-06, "loss": 0.0323, "step": 315 }, { "clip_ratio/high_max": 0.002613388714962639, "clip_ratio/high_mean": 0.0012897571941721253, "clip_ratio/low_mean": 0.001050957234838279, "clip_ratio/low_min": 5.0486847612774e-05, "clip_ratio/region_mean": 0.0023407143817166798, "epoch": 0.029493259600184334, "grad_norm": 0.11195064336061478, "learning_rate": 1e-06, "loss": 0.0119, "step": 316 }, { "clip_ratio/high_max": 0.002533223043428734, "clip_ratio/high_mean": 0.001145669855759479, "clip_ratio/low_mean": 0.001398324515321292, "clip_ratio/low_min": 0.00014239074243960204, "clip_ratio/region_mean": 0.0025439944001846015, "epoch": 0.029586592700184915, "grad_norm": 0.12074705958366394, "learning_rate": 1e-06, "loss": 0.0316, "step": 317 }, { "clip_ratio/high_max": 0.00338101114903111, "clip_ratio/high_mean": 0.0012102191358280834, "clip_ratio/low_mean": 0.0014048545417608693, "clip_ratio/low_min": 0.0002666663258423796, "clip_ratio/region_mean": 0.0026150736666750163, "epoch": 0.0296799258001855, "grad_norm": 0.1311887502670288, "learning_rate": 1e-06, "loss": 0.0339, "step": 318 }, { "clip_ratio/high_max": 0.0028307714019319974, "clip_ratio/high_mean": 0.0012912246857013088, "clip_ratio/low_mean": 0.0012666746115428396, "clip_ratio/low_min": 0.00013638117343361955, "clip_ratio/region_mean": 0.0025578993081580848, "epoch": 0.029773258900186082, "grad_norm": 0.11552929133176804, "learning_rate": 1e-06, "loss": -0.0098, "step": 319 }, { "clip_ratio/high_max": 0.002431904333207058, "clip_ratio/high_mean": 0.0010434661162435077, "clip_ratio/low_mean": 0.0013269542941998225, "clip_ratio/low_min": 0.0001878975181170972, "clip_ratio/region_mean": 0.002370420392253436, "epoch": 0.029866592000186667, "grad_norm": 0.113045334815979, "learning_rate": 1e-06, "loss": 0.0229, "step": 320 }, { "clip_ratio/high_max": 0.002727728264289908, "clip_ratio/high_mean": 0.001258336560567841, "clip_ratio/low_mean": 0.001537319207272958, "clip_ratio/low_min": 9.894037793856114e-05, "clip_ratio/region_mean": 0.0027956557751167566, "epoch": 0.02995992510018725, "grad_norm": 0.12713514268398285, "learning_rate": 1e-06, "loss": 0.0232, "step": 321 }, { "clip_ratio/high_max": 0.0027570805468712933, "clip_ratio/high_mean": 0.001182385425636312, "clip_ratio/low_mean": 0.0013822237106069224, "clip_ratio/low_min": 0.00010947411919914884, "clip_ratio/region_mean": 0.002564609138062224, "epoch": 0.030053258200187834, "grad_norm": 0.11906226724386215, "learning_rate": 1e-06, "loss": 0.0276, "step": 322 }, { "clip_ratio/high_max": 0.00273782334261341, "clip_ratio/high_mean": 0.0012277323185116984, "clip_ratio/low_mean": 0.0016281834068649914, "clip_ratio/low_min": 0.0002921199702541344, "clip_ratio/region_mean": 0.0028559158236021176, "epoch": 0.030146591300188416, "grad_norm": 0.1219710111618042, "learning_rate": 1e-06, "loss": 0.0763, "step": 323 }, { "clip_ratio/high_max": 0.0026153652906941716, "clip_ratio/high_mean": 0.0011733440314856125, "clip_ratio/low_mean": 0.001203061947308015, "clip_ratio/low_min": 0.0001541512829135172, "clip_ratio/region_mean": 0.0023764060097164474, "epoch": 0.030239924400188998, "grad_norm": 0.11505386233329773, "learning_rate": 1e-06, "loss": 0.0098, "step": 324 }, { "clip_ratio/high_max": 0.0029785673395963386, "clip_ratio/high_mean": 0.0013684470468433574, "clip_ratio/low_mean": 0.0013609007364721037, "clip_ratio/low_min": 0.0001766563109413255, "clip_ratio/region_mean": 0.0027293477542116307, "epoch": 0.030333257500189583, "grad_norm": 0.11739931255578995, "learning_rate": 1e-06, "loss": -0.0273, "step": 325 }, { "clip_ratio/high_max": 0.0028727469325531274, "clip_ratio/high_mean": 0.0012607965181814507, "clip_ratio/low_mean": 0.0012595268235600088, "clip_ratio/low_min": 0.00010610255776555277, "clip_ratio/region_mean": 0.00252032330899965, "epoch": 0.030426590600190165, "grad_norm": 0.12706126272678375, "learning_rate": 1e-06, "loss": -0.0123, "step": 326 }, { "clip_ratio/high_max": 0.002294195153808687, "clip_ratio/high_mean": 0.0010198140535067068, "clip_ratio/low_mean": 0.0013177910768717993, "clip_ratio/low_min": 6.472573386417935e-05, "clip_ratio/region_mean": 0.0023376051103696227, "epoch": 0.03051992370019075, "grad_norm": 0.1108463779091835, "learning_rate": 1e-06, "loss": 0.0716, "step": 327 }, { "clip_ratio/high_max": 0.0022406296484405175, "clip_ratio/high_mean": 0.0010030687444668729, "clip_ratio/low_mean": 0.0012518016919784714, "clip_ratio/low_min": 0.0002432438222967903, "clip_ratio/region_mean": 0.0022548704109794926, "epoch": 0.030613256800191332, "grad_norm": 0.11512810736894608, "learning_rate": 1e-06, "loss": 0.031, "step": 328 }, { "clip_ratio/high_max": 0.002654530129802879, "clip_ratio/high_mean": 0.0012471846494008787, "clip_ratio/low_mean": 0.001384411683829967, "clip_ratio/low_min": 0.00021466767975653056, "clip_ratio/region_mean": 0.002631596384162549, "epoch": 0.030706589900191917, "grad_norm": 0.1292494535446167, "learning_rate": 1e-06, "loss": -0.0139, "step": 329 }, { "clip_ratio/high_max": 0.002669588844582904, "clip_ratio/high_mean": 0.001118953663535649, "clip_ratio/low_mean": 0.0012583058160089422, "clip_ratio/low_min": 0.00015707740931247827, "clip_ratio/region_mean": 0.0023772594722686335, "epoch": 0.0307999230001925, "grad_norm": 0.11671560257673264, "learning_rate": 1e-06, "loss": 0.0397, "step": 330 }, { "clip_ratio/high_max": 0.002987729363667313, "clip_ratio/high_mean": 0.0013143653159204405, "clip_ratio/low_mean": 0.0014170816175465006, "clip_ratio/low_min": 0.00032416541489510564, "clip_ratio/region_mean": 0.002731446824327577, "epoch": 0.030893256100193084, "grad_norm": 0.12937036156654358, "learning_rate": 1e-06, "loss": 0.0041, "step": 331 }, { "clip_ratio/high_max": 0.00297994892753195, "clip_ratio/high_mean": 0.0013374622758419719, "clip_ratio/low_mean": 0.0013030631016590632, "clip_ratio/low_min": 7.35727344363113e-05, "clip_ratio/region_mean": 0.002640525322931353, "epoch": 0.030986589200193666, "grad_norm": 0.12331847846508026, "learning_rate": 1e-06, "loss": -0.0112, "step": 332 }, { "clip_ratio/high_max": 0.00271753131528385, "clip_ratio/high_mean": 0.0012775933973898645, "clip_ratio/low_mean": 0.001287443723413162, "clip_ratio/low_min": 0.00021289817414071877, "clip_ratio/region_mean": 0.0025650370807852596, "epoch": 0.03107992230019425, "grad_norm": 0.1075827106833458, "learning_rate": 1e-06, "loss": 0.0109, "step": 333 }, { "clip_ratio/high_max": 0.002579079162387643, "clip_ratio/high_mean": 0.0012311333302932326, "clip_ratio/low_mean": 0.0015370732035080437, "clip_ratio/low_min": 0.00031929068063618615, "clip_ratio/region_mean": 0.0027682064755936153, "epoch": 0.031173255400194833, "grad_norm": 0.11311246454715729, "learning_rate": 1e-06, "loss": 0.0352, "step": 334 }, { "clip_ratio/high_max": 0.0029205515020294115, "clip_ratio/high_mean": 0.0012675274047069252, "clip_ratio/low_mean": 0.0012723055770038627, "clip_ratio/low_min": 0.00019051301933359355, "clip_ratio/region_mean": 0.0025398330035386607, "epoch": 0.03126658850019542, "grad_norm": 0.11139726638793945, "learning_rate": 1e-06, "loss": 0.0009, "step": 335 }, { "clip_ratio/high_max": 0.002710441993258428, "clip_ratio/high_mean": 0.0011613564893195871, "clip_ratio/low_mean": 0.0012366141090751626, "clip_ratio/low_min": 7.743687365291407e-05, "clip_ratio/region_mean": 0.002397970631136559, "epoch": 0.031359921600196, "grad_norm": 0.1116127148270607, "learning_rate": 1e-06, "loss": -0.0108, "step": 336 }, { "clip_ratio/high_max": 0.0024759200096013956, "clip_ratio/high_mean": 0.0012100106014258927, "clip_ratio/low_mean": 0.0014488231245195493, "clip_ratio/low_min": 0.0002433279987599235, "clip_ratio/region_mean": 0.002658833698660601, "epoch": 0.03145325470019658, "grad_norm": 0.125503808259964, "learning_rate": 1e-06, "loss": 0.021, "step": 337 }, { "clip_ratio/high_max": 0.002818977656716015, "clip_ratio/high_mean": 0.001187575497169746, "clip_ratio/low_mean": 0.0014269797175074928, "clip_ratio/low_min": 0.00014719645605509868, "clip_ratio/region_mean": 0.002614555196487345, "epoch": 0.031546587800197166, "grad_norm": 0.12357683479785919, "learning_rate": 1e-06, "loss": 0.0522, "step": 338 }, { "clip_ratio/high_max": 0.0024323719771928154, "clip_ratio/high_mean": 0.0011744552193704294, "clip_ratio/low_mean": 0.0015022774787212256, "clip_ratio/low_min": 5.556790347327478e-05, "clip_ratio/region_mean": 0.0026767327217385173, "epoch": 0.03163992090019775, "grad_norm": 0.11504726856946945, "learning_rate": 1e-06, "loss": 0.0384, "step": 339 }, { "clip_ratio/high_max": 0.0028221156244399026, "clip_ratio/high_mean": 0.0013185626157792285, "clip_ratio/low_mean": 0.0014383208654180635, "clip_ratio/low_min": 0.0001980749329959508, "clip_ratio/region_mean": 0.002756883463007398, "epoch": 0.03173325400019833, "grad_norm": 0.13010273873806, "learning_rate": 1e-06, "loss": 0.0563, "step": 340 }, { "clip_ratio/high_max": 0.0024896442992030643, "clip_ratio/high_mean": 0.001195636734337313, "clip_ratio/low_mean": 0.0012817933020414785, "clip_ratio/low_min": 0.00014836681566521293, "clip_ratio/region_mean": 0.0024774300909484737, "epoch": 0.031826587100198915, "grad_norm": 0.11577541381120682, "learning_rate": 1e-06, "loss": -0.0049, "step": 341 }, { "clip_ratio/high_max": 0.0027691277718986385, "clip_ratio/high_mean": 0.0012069160329701845, "clip_ratio/low_mean": 0.0013301407307153568, "clip_ratio/low_min": 9.675918954599183e-05, "clip_ratio/region_mean": 0.002537056752771605, "epoch": 0.0319199202001995, "grad_norm": 0.11978305876255035, "learning_rate": 1e-06, "loss": 0.0307, "step": 342 }, { "clip_ratio/high_max": 0.002609323670185404, "clip_ratio/high_mean": 0.0011365679511072813, "clip_ratio/low_mean": 0.0014654387232440058, "clip_ratio/low_min": 0.0002511220859560126, "clip_ratio/region_mean": 0.002602006701636128, "epoch": 0.032013253300200085, "grad_norm": 0.13147114217281342, "learning_rate": 1e-06, "loss": 0.0728, "step": 343 }, { "clip_ratio/high_max": 0.002931735318270512, "clip_ratio/high_mean": 0.0012131395778851584, "clip_ratio/low_mean": 0.0012386130656523164, "clip_ratio/low_min": 0.0002098496979670017, "clip_ratio/region_mean": 0.0024517526835552417, "epoch": 0.032106586400200664, "grad_norm": 0.12044328451156616, "learning_rate": 1e-06, "loss": 0.0324, "step": 344 }, { "clip_ratio/high_max": 0.002839184606273193, "clip_ratio/high_mean": 0.001230621859576786, "clip_ratio/low_mean": 0.0013291972973092925, "clip_ratio/low_min": 0.00017719788775139023, "clip_ratio/region_mean": 0.002559819149610121, "epoch": 0.03219991950020125, "grad_norm": 0.11433827131986618, "learning_rate": 1e-06, "loss": 0.0281, "step": 345 }, { "clip_ratio/high_max": 0.0029480933299055323, "clip_ratio/high_mean": 0.0012881237562396564, "clip_ratio/low_mean": 0.0014575713721569628, "clip_ratio/low_min": 0.00031186802607408026, "clip_ratio/region_mean": 0.002745695164776407, "epoch": 0.032293252600201834, "grad_norm": 0.12129674851894379, "learning_rate": 1e-06, "loss": 0.0456, "step": 346 }, { "clip_ratio/high_max": 0.0027298552959109657, "clip_ratio/high_mean": 0.001182255509775132, "clip_ratio/low_mean": 0.0014558730908902362, "clip_ratio/low_min": 0.00019792932380369166, "clip_ratio/region_mean": 0.002638128622493241, "epoch": 0.03238658570020242, "grad_norm": 0.11985555291175842, "learning_rate": 1e-06, "loss": 0.0352, "step": 347 }, { "clip_ratio/high_max": 0.0026596702955430374, "clip_ratio/high_mean": 0.001141934440965997, "clip_ratio/low_mean": 0.0012547573078336427, "clip_ratio/low_min": 0.00014345739509735722, "clip_ratio/region_mean": 0.0023966917287907563, "epoch": 0.032479918800203, "grad_norm": 0.11207590997219086, "learning_rate": 1e-06, "loss": -0.0092, "step": 348 }, { "clip_ratio/high_max": 0.002559121923695784, "clip_ratio/high_mean": 0.0012003442679997534, "clip_ratio/low_mean": 0.0014691905016661622, "clip_ratio/low_min": 9.125880569627043e-05, "clip_ratio/region_mean": 0.002669534776941873, "epoch": 0.03257325190020358, "grad_norm": 3.4749622344970703, "learning_rate": 1e-06, "loss": 0.0241, "step": 349 }, { "clip_ratio/high_max": 0.002477469613950234, "clip_ratio/high_mean": 0.0011365012142050546, "clip_ratio/low_mean": 0.0013982561358716339, "clip_ratio/low_min": 0.00016291175415972248, "clip_ratio/region_mean": 0.002534757317334879, "epoch": 0.03266658500020417, "grad_norm": 0.10832560807466507, "learning_rate": 1e-06, "loss": 0.0101, "step": 350 }, { "clip_ratio/high_max": 0.0024960657028714195, "clip_ratio/high_mean": 0.001266965857212199, "clip_ratio/low_mean": 0.0016646083749947138, "clip_ratio/low_min": 0.00031376024253404466, "clip_ratio/region_mean": 0.0029315741994651034, "epoch": 0.032759918100204746, "grad_norm": 0.12081333994865417, "learning_rate": 1e-06, "loss": 0.0491, "step": 351 }, { "clip_ratio/high_max": 0.002582907116448041, "clip_ratio/high_mean": 0.001202720268338453, "clip_ratio/low_mean": 0.0014803973208472598, "clip_ratio/low_min": 0.00019903177508240333, "clip_ratio/region_mean": 0.0026831175928236917, "epoch": 0.03285325120020533, "grad_norm": 0.11948749423027039, "learning_rate": 1e-06, "loss": 0.0409, "step": 352 }, { "clip_ratio/high_max": 0.0030550208903150633, "clip_ratio/high_mean": 0.0012226207763887942, "clip_ratio/low_mean": 0.0015107838371477555, "clip_ratio/low_min": 0.00012934763435623609, "clip_ratio/region_mean": 0.0027334046171745285, "epoch": 0.03294658430020592, "grad_norm": 0.1121947318315506, "learning_rate": 1e-06, "loss": 0.0079, "step": 353 }, { "clip_ratio/high_max": 0.0023161769968282897, "clip_ratio/high_mean": 0.001042079231410753, "clip_ratio/low_mean": 0.0014469076922978275, "clip_ratio/low_min": 0.00015345194333349355, "clip_ratio/region_mean": 0.002488986909156665, "epoch": 0.0330399174002065, "grad_norm": 0.11080152541399002, "learning_rate": 1e-06, "loss": 0.0186, "step": 354 }, { "clip_ratio/high_max": 0.002782282972475514, "clip_ratio/high_mean": 0.0011958974719163962, "clip_ratio/low_mean": 0.001502432132838294, "clip_ratio/low_min": 0.0002410057913948549, "clip_ratio/region_mean": 0.0026983295974787325, "epoch": 0.03313325050020708, "grad_norm": 0.10277802497148514, "learning_rate": 1e-06, "loss": 0.0455, "step": 355 }, { "clip_ratio/high_max": 0.003092012084380258, "clip_ratio/high_mean": 0.0013035802330705337, "clip_ratio/low_mean": 0.001442345317627769, "clip_ratio/low_min": 9.913241046888288e-05, "clip_ratio/region_mean": 0.0027459256525617093, "epoch": 0.033226583600207665, "grad_norm": 0.2214948683977127, "learning_rate": 1e-06, "loss": 0.0189, "step": 356 }, { "clip_ratio/high_max": 0.0026209849966107868, "clip_ratio/high_mean": 0.0011954216133744922, "clip_ratio/low_mean": 0.001534127106424421, "clip_ratio/low_min": 0.00040555588111601537, "clip_ratio/region_mean": 0.002729548723436892, "epoch": 0.03331991670020825, "grad_norm": 0.11518749594688416, "learning_rate": 1e-06, "loss": 0.0176, "step": 357 }, { "clip_ratio/high_max": 0.0024458394764224067, "clip_ratio/high_mean": 0.0011301611411909107, "clip_ratio/low_mean": 0.0013621316575154196, "clip_ratio/low_min": 0.00017250300606974633, "clip_ratio/region_mean": 0.0024922927550505847, "epoch": 0.033413249800208836, "grad_norm": 0.11775501817464828, "learning_rate": 1e-06, "loss": 0.0644, "step": 358 }, { "clip_ratio/high_max": 0.0024289016146212816, "clip_ratio/high_mean": 0.0011500306172820274, "clip_ratio/low_mean": 0.0012023261660942808, "clip_ratio/low_min": 0.00015564248133159708, "clip_ratio/region_mean": 0.002352356787014287, "epoch": 0.033506582900209414, "grad_norm": 0.11520274728536606, "learning_rate": 1e-06, "loss": 0.0143, "step": 359 }, { "clip_ratio/high_max": 0.002975747687742114, "clip_ratio/high_mean": 0.0013569052534876391, "clip_ratio/low_mean": 0.0012018972192890942, "clip_ratio/low_min": 0.0001467452475480968, "clip_ratio/region_mean": 0.0025588025091565214, "epoch": 0.03359991600021, "grad_norm": 0.11080080270767212, "learning_rate": 1e-06, "loss": -0.0184, "step": 360 }, { "clip_ratio/high_max": 0.0031076144878170453, "clip_ratio/high_mean": 0.0014688008268421981, "clip_ratio/low_mean": 0.001412544552295003, "clip_ratio/low_min": 0.00015802310281287646, "clip_ratio/region_mean": 0.002881345404603053, "epoch": 0.033693249100210584, "grad_norm": 0.12494487315416336, "learning_rate": 1e-06, "loss": -0.0005, "step": 361 }, { "clip_ratio/high_max": 0.003029650433745701, "clip_ratio/high_mean": 0.0013258152503112797, "clip_ratio/low_mean": 0.0013449108737404458, "clip_ratio/low_min": 6.173439942358527e-05, "clip_ratio/region_mean": 0.0026707261567935348, "epoch": 0.03378658220021117, "grad_norm": 0.1218714639544487, "learning_rate": 1e-06, "loss": 0.0138, "step": 362 }, { "clip_ratio/high_max": 0.003296908544143662, "clip_ratio/high_mean": 0.0014315507396531757, "clip_ratio/low_mean": 0.0014824158424744383, "clip_ratio/low_min": 0.00027321754532749765, "clip_ratio/region_mean": 0.0029139665493858047, "epoch": 0.03387991530021175, "grad_norm": 0.11771242320537567, "learning_rate": 1e-06, "loss": 0.0399, "step": 363 }, { "clip_ratio/high_max": 0.002922888335888274, "clip_ratio/high_mean": 0.0013917280466557713, "clip_ratio/low_mean": 0.0014605308060708921, "clip_ratio/low_min": 0.0001180035833385773, "clip_ratio/region_mean": 0.002852258854545653, "epoch": 0.03397324840021233, "grad_norm": 0.11393259465694427, "learning_rate": 1e-06, "loss": 0.0135, "step": 364 }, { "clip_ratio/high_max": 0.0030631140180048533, "clip_ratio/high_mean": 0.0012989162241865415, "clip_ratio/low_mean": 0.0015420503877976444, "clip_ratio/low_min": 0.0002679603967408184, "clip_ratio/region_mean": 0.002840966553776525, "epoch": 0.03406658150021292, "grad_norm": 0.12158150225877762, "learning_rate": 1e-06, "loss": 0.0673, "step": 365 }, { "clip_ratio/high_max": 0.002992504494613968, "clip_ratio/high_mean": 0.001317954454862047, "clip_ratio/low_mean": 0.0016970324140856974, "clip_ratio/low_min": 0.0003657548495539231, "clip_ratio/region_mean": 0.0030149868689477444, "epoch": 0.034159914600213497, "grad_norm": 0.12091277539730072, "learning_rate": 1e-06, "loss": 0.0342, "step": 366 }, { "clip_ratio/high_max": 0.0026127718956558965, "clip_ratio/high_mean": 0.0012259299801371526, "clip_ratio/low_mean": 0.001328467573330272, "clip_ratio/low_min": 0.00014471973736362997, "clip_ratio/region_mean": 0.002554397564381361, "epoch": 0.03425324770021408, "grad_norm": 0.12249797582626343, "learning_rate": 1e-06, "loss": 0.0239, "step": 367 }, { "clip_ratio/high_max": 0.003144436050206423, "clip_ratio/high_mean": 0.001477711739426013, "clip_ratio/low_mean": 0.001165086836408591, "clip_ratio/low_min": 9.815675184654538e-05, "clip_ratio/region_mean": 0.0026427985285408795, "epoch": 0.03434658080021467, "grad_norm": 0.11737898737192154, "learning_rate": 1e-06, "loss": -0.0186, "step": 368 }, { "clip_ratio/high_max": 0.003237921053369064, "clip_ratio/high_mean": 0.0013613178925879765, "clip_ratio/low_mean": 0.0014945425282348879, "clip_ratio/low_min": 0.0002728998360908008, "clip_ratio/region_mean": 0.002855860446288716, "epoch": 0.03443991390021525, "grad_norm": 0.12224118411540985, "learning_rate": 1e-06, "loss": 0.0012, "step": 369 }, { "clip_ratio/high_max": 0.002930335816927254, "clip_ratio/high_mean": 0.0011756890198739711, "clip_ratio/low_mean": 0.0013318125384103041, "clip_ratio/low_min": 0.0002064401523966808, "clip_ratio/region_mean": 0.0025075016237678938, "epoch": 0.03453324700021583, "grad_norm": 0.11294194310903549, "learning_rate": 1e-06, "loss": 0.0223, "step": 370 }, { "clip_ratio/high_max": 0.0030744605683139525, "clip_ratio/high_mean": 0.0013768448843620718, "clip_ratio/low_mean": 0.0013118575898261042, "clip_ratio/low_min": 0.0001530354256829014, "clip_ratio/region_mean": 0.0026887024578172714, "epoch": 0.034626580100216416, "grad_norm": 0.1312245875597, "learning_rate": 1e-06, "loss": -0.0347, "step": 371 }, { "clip_ratio/high_max": 0.002997469222464133, "clip_ratio/high_mean": 0.0013667259299836587, "clip_ratio/low_mean": 0.0013200467146816663, "clip_ratio/low_min": 0.00016788743232609704, "clip_ratio/region_mean": 0.002686772611923516, "epoch": 0.034719913200217, "grad_norm": 0.11536245793104172, "learning_rate": 1e-06, "loss": -0.0226, "step": 372 }, { "clip_ratio/high_max": 0.0030330172085086815, "clip_ratio/high_mean": 0.0012987891968805343, "clip_ratio/low_mean": 0.0016039513066061772, "clip_ratio/low_min": 0.00014207320418790914, "clip_ratio/region_mean": 0.0029027405616943724, "epoch": 0.034813246300217586, "grad_norm": 0.11989035457372665, "learning_rate": 1e-06, "loss": 0.0253, "step": 373 }, { "clip_ratio/high_max": 0.003129093471216038, "clip_ratio/high_mean": 0.0013848109992977697, "clip_ratio/low_mean": 0.001588617444213014, "clip_ratio/low_min": 0.00021316480160749052, "clip_ratio/region_mean": 0.002973428403493017, "epoch": 0.034906579400218164, "grad_norm": 0.11638612300157547, "learning_rate": 1e-06, "loss": 0.0296, "step": 374 }, { "clip_ratio/high_max": 0.0028542610234580934, "clip_ratio/high_mean": 0.0012471241752791684, "clip_ratio/low_mean": 0.0014790143723075744, "clip_ratio/low_min": 0.00019667933338496368, "clip_ratio/region_mean": 0.002726138540310785, "epoch": 0.03499991250021875, "grad_norm": 0.1196187362074852, "learning_rate": 1e-06, "loss": 0.0031, "step": 375 }, { "clip_ratio/high_max": 0.002658857723872643, "clip_ratio/high_mean": 0.0011874367119162343, "clip_ratio/low_mean": 0.0015645574349036906, "clip_ratio/low_min": 0.00030970828993304167, "clip_ratio/region_mean": 0.0027519941213540733, "epoch": 0.035093245600219335, "grad_norm": 0.11873576045036316, "learning_rate": 1e-06, "loss": 0.0442, "step": 376 }, { "clip_ratio/high_max": 0.002949292211269494, "clip_ratio/high_mean": 0.0013498104235623032, "clip_ratio/low_mean": 0.0014495048362732632, "clip_ratio/low_min": 0.00013135279459675075, "clip_ratio/region_mean": 0.00279931521799881, "epoch": 0.03518657870021991, "grad_norm": 0.11756071448326111, "learning_rate": 1e-06, "loss": 0.0334, "step": 377 }, { "clip_ratio/high_max": 0.0032875392425921746, "clip_ratio/high_mean": 0.0013379990232351702, "clip_ratio/low_mean": 0.001447006499802228, "clip_ratio/low_min": 0.00026105966935574543, "clip_ratio/region_mean": 0.002785005490295589, "epoch": 0.0352799118002205, "grad_norm": 0.11876508593559265, "learning_rate": 1e-06, "loss": -0.0003, "step": 378 }, { "clip_ratio/high_max": 0.0033529016509419307, "clip_ratio/high_mean": 0.0015341117104981095, "clip_ratio/low_mean": 0.001427228286047466, "clip_ratio/low_min": 0.0002483739290255471, "clip_ratio/region_mean": 0.002961340025649406, "epoch": 0.03537324490022108, "grad_norm": 0.11736912280321121, "learning_rate": 1e-06, "loss": -0.0237, "step": 379 }, { "clip_ratio/high_max": 0.0025889503958751447, "clip_ratio/high_mean": 0.0009806359439608059, "clip_ratio/low_mean": 0.0014777261894778349, "clip_ratio/low_min": 0.00016089421842480078, "clip_ratio/region_mean": 0.0024583620761404745, "epoch": 0.03546657800022167, "grad_norm": 0.12600675225257874, "learning_rate": 1e-06, "loss": 0.0366, "step": 380 }, { "clip_ratio/high_max": 0.002983310798299499, "clip_ratio/high_mean": 0.0012190710367576685, "clip_ratio/low_mean": 0.0013812572688038927, "clip_ratio/low_min": 0.00022087665092840325, "clip_ratio/region_mean": 0.002600328291009646, "epoch": 0.03555991110022225, "grad_norm": 0.12188021093606949, "learning_rate": 1e-06, "loss": 0.0033, "step": 381 }, { "clip_ratio/high_max": 0.0027397128578741103, "clip_ratio/high_mean": 0.0011885113854077645, "clip_ratio/low_mean": 0.0017685258280835114, "clip_ratio/low_min": 0.00022877414630784187, "clip_ratio/region_mean": 0.0029570372353191487, "epoch": 0.03565324420022283, "grad_norm": 0.11412032693624496, "learning_rate": 1e-06, "loss": 0.0555, "step": 382 }, { "clip_ratio/high_max": 0.003093159233685583, "clip_ratio/high_mean": 0.0013495236344169825, "clip_ratio/low_mean": 0.0014810246939305216, "clip_ratio/low_min": 0.00021714700960728806, "clip_ratio/region_mean": 0.0028305482992436737, "epoch": 0.03574657730022342, "grad_norm": 0.12263639271259308, "learning_rate": 1e-06, "loss": 0.0372, "step": 383 }, { "clip_ratio/high_max": 0.002536262647481635, "clip_ratio/high_mean": 0.0011663670593407005, "clip_ratio/low_mean": 0.001471982501243474, "clip_ratio/low_min": 0.00014538026334776077, "clip_ratio/region_mean": 0.0026383495569461957, "epoch": 0.035839910400224, "grad_norm": 0.11592624336481094, "learning_rate": 1e-06, "loss": 0.0076, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012956891741071397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 605.6300659179688, "completions/mean_terminated_length": 559.81201171875, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.03593324350022458, "grad_norm": 0.12711025774478912, "learning_rate": 1e-06, "loss": 0.0677, "num_tokens": 323251285.0, "reward": 0.567391574382782, "reward_std": 0.2123998999595642, "rewards/simpleverify_reward/mean": 0.5673915147781372, "rewards/simpleverify_reward/std": 0.4954396188259125, "step": 385 }, { "clip_ratio/high_max": 0.0021397297496150713, "clip_ratio/high_mean": 0.0008496433792970493, "clip_ratio/low_mean": 0.0006479338917415589, "clip_ratio/low_min": 3.825265412160661e-05, "clip_ratio/region_mean": 0.0014975772573961876, "epoch": 0.036026576600225166, "grad_norm": 0.12022633105516434, "learning_rate": 1e-06, "loss": 0.0407, "step": 386 }, { "clip_ratio/high_max": 0.00232318328198744, "clip_ratio/high_mean": 0.0009690656061138725, "clip_ratio/low_mean": 0.000670978288326296, "clip_ratio/low_min": 6.840613878011936e-05, "clip_ratio/region_mean": 0.0016400438980781473, "epoch": 0.03611990970022575, "grad_norm": 0.11227843910455704, "learning_rate": 1e-06, "loss": 0.0522, "step": 387 }, { "clip_ratio/high_max": 0.0020417370687937364, "clip_ratio/high_mean": 0.0008666624235047493, "clip_ratio/low_mean": 0.000664237180899363, "clip_ratio/low_min": 2.379434499744093e-05, "clip_ratio/region_mean": 0.001530899571662303, "epoch": 0.03621324280022633, "grad_norm": 0.11939997225999832, "learning_rate": 1e-06, "loss": 0.0485, "step": 388 }, { "clip_ratio/high_max": 0.0027182477424503304, "clip_ratio/high_mean": 0.0010685947145248065, "clip_ratio/low_mean": 0.0006693238592561102, "clip_ratio/low_min": 6.567220771103166e-05, "clip_ratio/region_mean": 0.0017379185737809166, "epoch": 0.036306575900226914, "grad_norm": 0.11462533473968506, "learning_rate": 1e-06, "loss": -0.0355, "step": 389 }, { "clip_ratio/high_max": 0.002425007405690849, "clip_ratio/high_mean": 0.0010334346188756172, "clip_ratio/low_mean": 0.0007138050896173809, "clip_ratio/low_min": 8.032506775634829e-05, "clip_ratio/region_mean": 0.0017472397157689556, "epoch": 0.0363999090002275, "grad_norm": 0.12447582930326462, "learning_rate": 1e-06, "loss": -0.0093, "step": 390 }, { "clip_ratio/high_max": 0.0025341107611893676, "clip_ratio/high_mean": 0.0011683505472319666, "clip_ratio/low_mean": 0.0007385787139355671, "clip_ratio/low_min": 3.290339736850001e-05, "clip_ratio/region_mean": 0.0019069292611675337, "epoch": 0.036493242100228085, "grad_norm": 0.11291956901550293, "learning_rate": 1e-06, "loss": -0.046, "step": 391 }, { "clip_ratio/high_max": 0.0020968693861505017, "clip_ratio/high_mean": 0.0009629784399294294, "clip_ratio/low_mean": 0.0009910686094372068, "clip_ratio/low_min": 6.0827079323644284e-05, "clip_ratio/region_mean": 0.0019540470675565302, "epoch": 0.03658657520022866, "grad_norm": 0.11530353873968124, "learning_rate": 1e-06, "loss": 0.0613, "step": 392 }, { "clip_ratio/high_max": 0.002384906867519021, "clip_ratio/high_mean": 0.0010603293012536597, "clip_ratio/low_mean": 0.0009049226209754124, "clip_ratio/low_min": 0.00011611211812123656, "clip_ratio/region_mean": 0.001965251882211305, "epoch": 0.03667990830022925, "grad_norm": 0.1233193576335907, "learning_rate": 1e-06, "loss": -0.003, "step": 393 }, { "clip_ratio/high_max": 0.002504220545233693, "clip_ratio/high_mean": 0.0010534356842981651, "clip_ratio/low_mean": 0.0008702893701411085, "clip_ratio/low_min": 9.4570267265226e-05, "clip_ratio/region_mean": 0.0019237250162404962, "epoch": 0.036773241400229834, "grad_norm": 0.11533639580011368, "learning_rate": 1e-06, "loss": 0.0375, "step": 394 }, { "clip_ratio/high_max": 0.002351952418393921, "clip_ratio/high_mean": 0.0009425225825907546, "clip_ratio/low_mean": 0.0011128354526590556, "clip_ratio/low_min": 0.0001173481878140592, "clip_ratio/region_mean": 0.002055358068901114, "epoch": 0.03686657450023042, "grad_norm": 0.1315789818763733, "learning_rate": 1e-06, "loss": 0.0916, "step": 395 }, { "clip_ratio/high_max": 0.002362157429161016, "clip_ratio/high_mean": 0.000931930241677037, "clip_ratio/low_mean": 0.0009760237371665426, "clip_ratio/low_min": 8.948628601501696e-05, "clip_ratio/region_mean": 0.0019079539415542968, "epoch": 0.036959907600231, "grad_norm": 0.1327863186597824, "learning_rate": 1e-06, "loss": 0.0321, "step": 396 }, { "clip_ratio/high_max": 0.002023422217462212, "clip_ratio/high_mean": 0.0009445934592804406, "clip_ratio/low_mean": 0.0009359381001559086, "clip_ratio/low_min": 9.366624090034747e-05, "clip_ratio/region_mean": 0.0018805315485224128, "epoch": 0.03705324070023158, "grad_norm": 0.12058063596487045, "learning_rate": 1e-06, "loss": 0.0116, "step": 397 }, { "clip_ratio/high_max": 0.002045342462224653, "clip_ratio/high_mean": 0.0009971644794859458, "clip_ratio/low_mean": 0.0008318648015119834, "clip_ratio/low_min": 6.286709049163619e-05, "clip_ratio/region_mean": 0.001829029293730855, "epoch": 0.03714657380023217, "grad_norm": 0.12755998969078064, "learning_rate": 1e-06, "loss": 0.0006, "step": 398 }, { "clip_ratio/high_max": 0.0024093371248454787, "clip_ratio/high_mean": 0.0010071235046780203, "clip_ratio/low_mean": 0.0009501873009867268, "clip_ratio/low_min": 0.00013041961392445955, "clip_ratio/region_mean": 0.0019573108220356517, "epoch": 0.03723990690023275, "grad_norm": 0.13397479057312012, "learning_rate": 1e-06, "loss": 0.0351, "step": 399 }, { "clip_ratio/high_max": 0.0019218649940739851, "clip_ratio/high_mean": 0.0008739265640542726, "clip_ratio/low_mean": 0.00089426988051855, "clip_ratio/low_min": 6.989745543251047e-05, "clip_ratio/region_mean": 0.0017681964382063597, "epoch": 0.03733324000023333, "grad_norm": 0.1116487979888916, "learning_rate": 1e-06, "loss": -0.0038, "step": 400 }, { "clip_ratio/high_max": 0.0022813589894212782, "clip_ratio/high_mean": 0.0009934133413480595, "clip_ratio/low_mean": 0.0011305456682748627, "clip_ratio/low_min": 8.463089579890948e-05, "clip_ratio/region_mean": 0.0021239590641926043, "epoch": 0.037426573100233916, "grad_norm": 0.12167389690876007, "learning_rate": 1e-06, "loss": 0.0134, "step": 401 }, { "clip_ratio/high_max": 0.0027896390602109022, "clip_ratio/high_mean": 0.0011046835752495099, "clip_ratio/low_mean": 0.0009482977329753339, "clip_ratio/low_min": 8.5561519881594e-05, "clip_ratio/region_mean": 0.002052981326414738, "epoch": 0.0375199062002345, "grad_norm": 0.11247431486845016, "learning_rate": 1e-06, "loss": 0.0117, "step": 402 }, { "clip_ratio/high_max": 0.0024053056913544424, "clip_ratio/high_mean": 0.0010184167076658923, "clip_ratio/low_mean": 0.0011096590424131136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002128075764630921, "epoch": 0.03761323930023508, "grad_norm": 0.1222907081246376, "learning_rate": 1e-06, "loss": 0.0198, "step": 403 }, { "clip_ratio/high_max": 0.002224021009169519, "clip_ratio/high_mean": 0.0010099315622937866, "clip_ratio/low_mean": 0.0010289443853253033, "clip_ratio/low_min": 0.00012426178545865696, "clip_ratio/region_mean": 0.0020388758966873866, "epoch": 0.037706572400235665, "grad_norm": 0.11835585534572601, "learning_rate": 1e-06, "loss": 0.0092, "step": 404 }, { "clip_ratio/high_max": 0.002185758152336348, "clip_ratio/high_mean": 0.0010172069050895516, "clip_ratio/low_mean": 0.0010902521971729584, "clip_ratio/low_min": 0.00017644204490352422, "clip_ratio/region_mean": 0.002107459142280277, "epoch": 0.03779990550023625, "grad_norm": 0.11552343517541885, "learning_rate": 1e-06, "loss": -0.002, "step": 405 }, { "clip_ratio/high_max": 0.0030390612155315466, "clip_ratio/high_mean": 0.001208569279697258, "clip_ratio/low_mean": 0.001038592014083406, "clip_ratio/low_min": 0.00013717379442823585, "clip_ratio/region_mean": 0.0022471613265224732, "epoch": 0.037893238600236835, "grad_norm": 0.11450153589248657, "learning_rate": 1e-06, "loss": -0.0371, "step": 406 }, { "clip_ratio/high_max": 0.002155105015845038, "clip_ratio/high_mean": 0.0008182993442460429, "clip_ratio/low_mean": 0.0009600231023796368, "clip_ratio/low_min": 0.00011936077498830855, "clip_ratio/region_mean": 0.0017783224466256797, "epoch": 0.03798657170023741, "grad_norm": 0.11137264221906662, "learning_rate": 1e-06, "loss": 0.0752, "step": 407 }, { "clip_ratio/high_max": 0.0023896176207927056, "clip_ratio/high_mean": 0.0010247544669255149, "clip_ratio/low_mean": 0.0011780979148170445, "clip_ratio/low_min": 0.00013312280771060614, "clip_ratio/region_mean": 0.00220285241084639, "epoch": 0.038079904800238, "grad_norm": 0.12629003822803497, "learning_rate": 1e-06, "loss": 0.0307, "step": 408 }, { "clip_ratio/high_max": 0.0023310889191634487, "clip_ratio/high_mean": 0.0010472496487636818, "clip_ratio/low_mean": 0.0010237017859253683, "clip_ratio/low_min": 0.00010384480447100941, "clip_ratio/region_mean": 0.002070951486530248, "epoch": 0.038173237900238584, "grad_norm": 0.12324889749288559, "learning_rate": 1e-06, "loss": -0.0035, "step": 409 }, { "clip_ratio/high_max": 0.0021046855981694534, "clip_ratio/high_mean": 0.0009924437017616583, "clip_ratio/low_mean": 0.0011546058158273809, "clip_ratio/low_min": 4.921210347674787e-05, "clip_ratio/region_mean": 0.002147049570339732, "epoch": 0.03826657100023917, "grad_norm": 0.1212676614522934, "learning_rate": 1e-06, "loss": 0.0439, "step": 410 }, { "clip_ratio/high_max": 0.0025673795462353155, "clip_ratio/high_mean": 0.0011265973189438228, "clip_ratio/low_mean": 0.0011353701302141417, "clip_ratio/low_min": 0.00013318741730472539, "clip_ratio/region_mean": 0.0022619674491579644, "epoch": 0.03835990410023975, "grad_norm": 0.11497417092323303, "learning_rate": 1e-06, "loss": 0.0238, "step": 411 }, { "clip_ratio/high_max": 0.0024536750788684003, "clip_ratio/high_mean": 0.0009998320310842246, "clip_ratio/low_mean": 0.0010761069715954363, "clip_ratio/low_min": 0.00011594409988902044, "clip_ratio/region_mean": 0.0020759389517479576, "epoch": 0.03845323720024033, "grad_norm": 0.11113645881414413, "learning_rate": 1e-06, "loss": -0.0057, "step": 412 }, { "clip_ratio/high_max": 0.002352974857785739, "clip_ratio/high_mean": 0.00101596657805203, "clip_ratio/low_mean": 0.001127457398979459, "clip_ratio/low_min": 9.757455154613126e-05, "clip_ratio/region_mean": 0.002143423967936542, "epoch": 0.03854657030024092, "grad_norm": 0.10741639137268066, "learning_rate": 1e-06, "loss": 0.0181, "step": 413 }, { "clip_ratio/high_max": 0.0025393421019543894, "clip_ratio/high_mean": 0.001066478731445386, "clip_ratio/low_mean": 0.0011650801043288084, "clip_ratio/low_min": 0.00013418069829640444, "clip_ratio/region_mean": 0.002231558828498237, "epoch": 0.038639903400241496, "grad_norm": 0.10366897284984589, "learning_rate": 1e-06, "loss": 0.0226, "step": 414 }, { "clip_ratio/high_max": 0.0022344187964336015, "clip_ratio/high_mean": 0.0010451171328895725, "clip_ratio/low_mean": 0.0010399218008387834, "clip_ratio/low_min": 0.0001321624231422902, "clip_ratio/region_mean": 0.0020850389410043135, "epoch": 0.03873323650024208, "grad_norm": 0.10847318172454834, "learning_rate": 1e-06, "loss": 0.0023, "step": 415 }, { "clip_ratio/high_max": 0.0025231009421986528, "clip_ratio/high_mean": 0.0010436738848511595, "clip_ratio/low_mean": 0.0012916867381136399, "clip_ratio/low_min": 0.00019523526589182438, "clip_ratio/region_mean": 0.002335360593860969, "epoch": 0.038826569600242666, "grad_norm": 0.11652623862028122, "learning_rate": 1e-06, "loss": -0.0004, "step": 416 }, { "clip_ratio/high_max": 0.0022899263276485726, "clip_ratio/high_mean": 0.0009768082582013449, "clip_ratio/low_mean": 0.001154935500380816, "clip_ratio/low_min": 0.00017526986448501702, "clip_ratio/region_mean": 0.0021317437203833833, "epoch": 0.03891990270024325, "grad_norm": 0.10895755887031555, "learning_rate": 1e-06, "loss": 0.0089, "step": 417 }, { "clip_ratio/high_max": 0.0021623237153107766, "clip_ratio/high_mean": 0.0009271793951484142, "clip_ratio/low_mean": 0.001203729523695074, "clip_ratio/low_min": 0.00014051249672775157, "clip_ratio/region_mean": 0.0021309089424903505, "epoch": 0.03901323580024383, "grad_norm": 0.11828435212373734, "learning_rate": 1e-06, "loss": 0.0487, "step": 418 }, { "clip_ratio/high_max": 0.0020629802565963473, "clip_ratio/high_mean": 0.0009097443526115967, "clip_ratio/low_mean": 0.0011997543842880987, "clip_ratio/low_min": 0.00012134482312831096, "clip_ratio/region_mean": 0.0021094987823744304, "epoch": 0.039106568900244415, "grad_norm": 0.1027655303478241, "learning_rate": 1e-06, "loss": 0.0329, "step": 419 }, { "clip_ratio/high_max": 0.002252902719192207, "clip_ratio/high_mean": 0.0009738856806507101, "clip_ratio/low_mean": 0.0013321080987225287, "clip_ratio/low_min": 7.121721318981145e-05, "clip_ratio/region_mean": 0.0023059937229845673, "epoch": 0.039199902000245, "grad_norm": 0.17782095074653625, "learning_rate": 1e-06, "loss": 0.0268, "step": 420 }, { "clip_ratio/high_max": 0.002589685194834601, "clip_ratio/high_mean": 0.0010109698450833093, "clip_ratio/low_mean": 0.0013040199009992648, "clip_ratio/low_min": 0.00014795976494497154, "clip_ratio/region_mean": 0.002314989746082574, "epoch": 0.039293235100245585, "grad_norm": 0.11937125027179718, "learning_rate": 1e-06, "loss": 0.0198, "step": 421 }, { "clip_ratio/high_max": 0.0022994001556071453, "clip_ratio/high_mean": 0.000989869935438037, "clip_ratio/low_mean": 0.0011434461157477926, "clip_ratio/low_min": 6.799359834985808e-05, "clip_ratio/region_mean": 0.0021333160184440203, "epoch": 0.039386568200246164, "grad_norm": 0.12590163946151733, "learning_rate": 1e-06, "loss": 0.0252, "step": 422 }, { "clip_ratio/high_max": 0.0020929700403939933, "clip_ratio/high_mean": 0.0009600345874787308, "clip_ratio/low_mean": 0.001167414080555318, "clip_ratio/low_min": 0.00016555865931877634, "clip_ratio/region_mean": 0.002127448591636494, "epoch": 0.03947990130024675, "grad_norm": 0.11396259814500809, "learning_rate": 1e-06, "loss": 0.0471, "step": 423 }, { "clip_ratio/high_max": 0.0025033290512510575, "clip_ratio/high_mean": 0.001124679885833757, "clip_ratio/low_mean": 0.0011718738132913131, "clip_ratio/low_min": 0.00011433269628469134, "clip_ratio/region_mean": 0.00229655369912507, "epoch": 0.039573234400247334, "grad_norm": 0.11035384982824326, "learning_rate": 1e-06, "loss": 0.0391, "step": 424 }, { "clip_ratio/high_max": 0.002575597885879688, "clip_ratio/high_mean": 0.0010167077452933881, "clip_ratio/low_mean": 0.001029267248668475, "clip_ratio/low_min": 6.282557842496317e-05, "clip_ratio/region_mean": 0.002045974979409948, "epoch": 0.03966656750024792, "grad_norm": 0.11969095468521118, "learning_rate": 1e-06, "loss": 0.036, "step": 425 }, { "clip_ratio/high_max": 0.0028100311537855305, "clip_ratio/high_mean": 0.0012184547013021074, "clip_ratio/low_mean": 0.0012659263993555214, "clip_ratio/low_min": 0.00010975159420922864, "clip_ratio/region_mean": 0.002484381118847523, "epoch": 0.0397599006002485, "grad_norm": 0.12384101748466492, "learning_rate": 1e-06, "loss": 0.0016, "step": 426 }, { "clip_ratio/high_max": 0.0027885406598215923, "clip_ratio/high_mean": 0.001222028222400695, "clip_ratio/low_mean": 0.0012854080523538869, "clip_ratio/low_min": 0.00016744929416745435, "clip_ratio/region_mean": 0.0025074362638406456, "epoch": 0.03985323370024908, "grad_norm": 0.1150302067399025, "learning_rate": 1e-06, "loss": 0.0071, "step": 427 }, { "clip_ratio/high_max": 0.0021578748419415206, "clip_ratio/high_mean": 0.0009818261132750195, "clip_ratio/low_mean": 0.001476924298913218, "clip_ratio/low_min": 0.00017322531493846327, "clip_ratio/region_mean": 0.0024587504303781316, "epoch": 0.03994656680024967, "grad_norm": 0.12892085313796997, "learning_rate": 1e-06, "loss": 0.0623, "step": 428 }, { "clip_ratio/high_max": 0.0024276287913380656, "clip_ratio/high_mean": 0.0010696872996049933, "clip_ratio/low_mean": 0.0011784744920078083, "clip_ratio/low_min": 0.0001868274212029064, "clip_ratio/region_mean": 0.002248161836178042, "epoch": 0.040039899900250246, "grad_norm": 0.11940957605838776, "learning_rate": 1e-06, "loss": 0.0254, "step": 429 }, { "clip_ratio/high_max": 0.002712379500735551, "clip_ratio/high_mean": 0.0011103753167844843, "clip_ratio/low_mean": 0.001316132471401943, "clip_ratio/low_min": 0.00014854518576612463, "clip_ratio/region_mean": 0.002426507846394088, "epoch": 0.04013323300025083, "grad_norm": 0.12025794386863708, "learning_rate": 1e-06, "loss": 0.0167, "step": 430 }, { "clip_ratio/high_max": 0.0021951302915113047, "clip_ratio/high_mean": 0.0009840695493039675, "clip_ratio/low_mean": 0.001477239806263242, "clip_ratio/low_min": 0.0002702295041672187, "clip_ratio/region_mean": 0.002461309341015294, "epoch": 0.04022656610025142, "grad_norm": 0.12111403793096542, "learning_rate": 1e-06, "loss": 0.0418, "step": 431 }, { "clip_ratio/high_max": 0.002914804055762943, "clip_ratio/high_mean": 0.0011710129620041698, "clip_ratio/low_mean": 0.0011693419237417402, "clip_ratio/low_min": 9.946343197952956e-05, "clip_ratio/region_mean": 0.002340354876650963, "epoch": 0.040319899200252, "grad_norm": 0.11317253857851028, "learning_rate": 1e-06, "loss": 0.0413, "step": 432 }, { "clip_ratio/high_max": 0.002791720755340066, "clip_ratio/high_mean": 0.001191430053950171, "clip_ratio/low_mean": 0.001183473963465076, "clip_ratio/low_min": 7.877407733758446e-05, "clip_ratio/region_mean": 0.0023749039246467873, "epoch": 0.04041323230025258, "grad_norm": 0.1236572116613388, "learning_rate": 1e-06, "loss": 0.0306, "step": 433 }, { "clip_ratio/high_max": 0.0026482223329367116, "clip_ratio/high_mean": 0.0011231598327867687, "clip_ratio/low_mean": 0.0010935265490843449, "clip_ratio/low_min": 9.633883337301086e-05, "clip_ratio/region_mean": 0.0022166864218888804, "epoch": 0.040506565400253165, "grad_norm": 0.11718609929084778, "learning_rate": 1e-06, "loss": -0.0033, "step": 434 }, { "clip_ratio/high_max": 0.002645078388013644, "clip_ratio/high_mean": 0.0011033074151782785, "clip_ratio/low_mean": 0.0012399249826557934, "clip_ratio/low_min": 0.0001457012076571118, "clip_ratio/region_mean": 0.0023432324451277964, "epoch": 0.04059989850025375, "grad_norm": 0.10465870797634125, "learning_rate": 1e-06, "loss": 0.0092, "step": 435 }, { "clip_ratio/high_max": 0.0031988687624107115, "clip_ratio/high_mean": 0.0013430521830741782, "clip_ratio/low_mean": 0.0010818801056302618, "clip_ratio/low_min": 6.306897284957813e-05, "clip_ratio/region_mean": 0.002424932266876567, "epoch": 0.040693231600254336, "grad_norm": 0.11486940830945969, "learning_rate": 1e-06, "loss": -0.0005, "step": 436 }, { "clip_ratio/high_max": 0.0026578874239930883, "clip_ratio/high_mean": 0.0010474859482201282, "clip_ratio/low_mean": 0.0014028800214873627, "clip_ratio/low_min": 9.14539323275676e-05, "clip_ratio/region_mean": 0.0024503660315531306, "epoch": 0.040786564700254914, "grad_norm": 0.11749535799026489, "learning_rate": 1e-06, "loss": 0.0493, "step": 437 }, { "clip_ratio/high_max": 0.002215251537563745, "clip_ratio/high_mean": 0.00098038495889341, "clip_ratio/low_mean": 0.001221110018377658, "clip_ratio/low_min": 0.00015379883734567557, "clip_ratio/region_mean": 0.0022014950081938878, "epoch": 0.0408798978002555, "grad_norm": 0.11227718740701675, "learning_rate": 1e-06, "loss": 0.0411, "step": 438 }, { "clip_ratio/high_max": 0.002576027167378925, "clip_ratio/high_mean": 0.001060488611983601, "clip_ratio/low_mean": 0.0012537051807157695, "clip_ratio/low_min": 6.072116866562283e-05, "clip_ratio/region_mean": 0.0023141937563195825, "epoch": 0.040973230900256084, "grad_norm": 0.10874490439891815, "learning_rate": 1e-06, "loss": 0.0374, "step": 439 }, { "clip_ratio/high_max": 0.0026765053044073284, "clip_ratio/high_mean": 0.0011153004743391648, "clip_ratio/low_mean": 0.0012127916088502388, "clip_ratio/low_min": 0.00022910187544766814, "clip_ratio/region_mean": 0.002328092115931213, "epoch": 0.04106656400025666, "grad_norm": 0.12040571868419647, "learning_rate": 1e-06, "loss": 0.0214, "step": 440 }, { "clip_ratio/high_max": 0.0024024201338761486, "clip_ratio/high_mean": 0.0011041076795663685, "clip_ratio/low_mean": 0.0014185359432303812, "clip_ratio/low_min": 0.0002264940703753382, "clip_ratio/region_mean": 0.0025226436409866437, "epoch": 0.04115989710025725, "grad_norm": 0.12071618437767029, "learning_rate": 1e-06, "loss": 0.0663, "step": 441 }, { "clip_ratio/high_max": 0.0032360234326915815, "clip_ratio/high_mean": 0.0013241096385172568, "clip_ratio/low_mean": 0.0010865663571166806, "clip_ratio/low_min": 5.424283881438896e-05, "clip_ratio/region_mean": 0.0024106759155984037, "epoch": 0.04125323020025783, "grad_norm": 0.1231648400425911, "learning_rate": 1e-06, "loss": -0.0089, "step": 442 }, { "clip_ratio/high_max": 0.002679995115613565, "clip_ratio/high_mean": 0.001158734665295924, "clip_ratio/low_mean": 0.0014259270774346078, "clip_ratio/low_min": 0.0002741580392466858, "clip_ratio/region_mean": 0.0025846617863862775, "epoch": 0.04134656330025842, "grad_norm": 0.1178274005651474, "learning_rate": 1e-06, "loss": 0.0266, "step": 443 }, { "clip_ratio/high_max": 0.0025484498910373077, "clip_ratio/high_mean": 0.0010472833000676474, "clip_ratio/low_mean": 0.0013999972943565808, "clip_ratio/low_min": 6.0549829868250526e-05, "clip_ratio/region_mean": 0.002447280625347048, "epoch": 0.041439896400258996, "grad_norm": 0.12369096279144287, "learning_rate": 1e-06, "loss": 0.0736, "step": 444 }, { "clip_ratio/high_max": 0.0026862284175876994, "clip_ratio/high_mean": 0.0011537462596606929, "clip_ratio/low_mean": 0.0011293766892777057, "clip_ratio/low_min": 0.00019748830254684435, "clip_ratio/region_mean": 0.0022831229434814304, "epoch": 0.04153322950025958, "grad_norm": 0.10899684578180313, "learning_rate": 1e-06, "loss": 0.0253, "step": 445 }, { "clip_ratio/high_max": 0.0029682034291909076, "clip_ratio/high_mean": 0.001209483458296745, "clip_ratio/low_mean": 0.0011931194967473857, "clip_ratio/low_min": 0.0002559849999670405, "clip_ratio/region_mean": 0.0024026029277592897, "epoch": 0.04162656260026017, "grad_norm": 0.1255938857793808, "learning_rate": 1e-06, "loss": 0.0461, "step": 446 }, { "clip_ratio/high_max": 0.003150494012515992, "clip_ratio/high_mean": 0.001252271787961945, "clip_ratio/low_mean": 0.0011317522330500651, "clip_ratio/low_min": 9.059268904820783e-05, "clip_ratio/region_mean": 0.0023840240100980736, "epoch": 0.04171989570026075, "grad_norm": 0.1184663474559784, "learning_rate": 1e-06, "loss": 0.0214, "step": 447 }, { "clip_ratio/high_max": 0.002483110685716383, "clip_ratio/high_mean": 0.001090563844627468, "clip_ratio/low_mean": 0.0013386291830101982, "clip_ratio/low_min": 0.00026584475199342705, "clip_ratio/region_mean": 0.0024291930385516025, "epoch": 0.04181322880026133, "grad_norm": 0.12208592146635056, "learning_rate": 1e-06, "loss": 0.0493, "step": 448 }, { "clip_ratio/high_max": 0.0027344242625986226, "clip_ratio/high_mean": 0.0011705778779287357, "clip_ratio/low_mean": 0.0013157717730791774, "clip_ratio/low_min": 0.0002751404063019436, "clip_ratio/region_mean": 0.0024863496655598283, "epoch": 0.041906561900261916, "grad_norm": 0.1273927390575409, "learning_rate": 1e-06, "loss": 0.0414, "step": 449 }, { "clip_ratio/high_max": 0.0028503137145889923, "clip_ratio/high_mean": 0.0010730548146966612, "clip_ratio/low_mean": 0.0013499146625690628, "clip_ratio/low_min": 0.00015746967983432114, "clip_ratio/region_mean": 0.0024229694245150313, "epoch": 0.0419998950002625, "grad_norm": 0.12964798510074615, "learning_rate": 1e-06, "loss": 0.0579, "step": 450 }, { "clip_ratio/high_max": 0.002570183205534704, "clip_ratio/high_mean": 0.0010310067773389164, "clip_ratio/low_mean": 0.0013300511127454229, "clip_ratio/low_min": 0.00015957068808347685, "clip_ratio/region_mean": 0.0023610578864463605, "epoch": 0.042093228100263086, "grad_norm": 0.11883798986673355, "learning_rate": 1e-06, "loss": 0.0696, "step": 451 }, { "clip_ratio/high_max": 0.0028321592617430724, "clip_ratio/high_mean": 0.001186878052976681, "clip_ratio/low_mean": 0.001336474819254363, "clip_ratio/low_min": 0.00016158549351530382, "clip_ratio/region_mean": 0.002523352872231044, "epoch": 0.042186561200263664, "grad_norm": 0.12103655934333801, "learning_rate": 1e-06, "loss": 0.0339, "step": 452 }, { "clip_ratio/high_max": 0.002958241391752381, "clip_ratio/high_mean": 0.0012343205307843164, "clip_ratio/low_mean": 0.0010745342697191518, "clip_ratio/low_min": 8.26233699626755e-05, "clip_ratio/region_mean": 0.002308854767761659, "epoch": 0.04227989430026425, "grad_norm": 0.10631893575191498, "learning_rate": 1e-06, "loss": 0.022, "step": 453 }, { "clip_ratio/high_max": 0.002495729553629644, "clip_ratio/high_mean": 0.0010520065770833753, "clip_ratio/low_mean": 0.001160851421445841, "clip_ratio/low_min": 0.00011522226213855902, "clip_ratio/region_mean": 0.0022128580530988984, "epoch": 0.042373227400264835, "grad_norm": 0.1074850857257843, "learning_rate": 1e-06, "loss": 0.0212, "step": 454 }, { "clip_ratio/high_max": 0.0023758730676490813, "clip_ratio/high_mean": 0.0010872426682908554, "clip_ratio/low_mean": 0.0012397541395330336, "clip_ratio/low_min": 0.0001317692640441237, "clip_ratio/region_mean": 0.002326996785996016, "epoch": 0.04246656050026541, "grad_norm": 0.11900975555181503, "learning_rate": 1e-06, "loss": 0.0555, "step": 455 }, { "clip_ratio/high_max": 0.002852059551514685, "clip_ratio/high_mean": 0.0013110979489283636, "clip_ratio/low_mean": 0.001148153722169809, "clip_ratio/low_min": 8.584175884607248e-05, "clip_ratio/region_mean": 0.002459251663822215, "epoch": 0.042559893600266, "grad_norm": 0.12325873970985413, "learning_rate": 1e-06, "loss": 0.0415, "step": 456 }, { "clip_ratio/high_max": 0.0028292798888287507, "clip_ratio/high_mean": 0.0013528350209526252, "clip_ratio/low_mean": 0.0012471344380173832, "clip_ratio/low_min": 0.0001571071770740673, "clip_ratio/region_mean": 0.0025999694626079872, "epoch": 0.04265322670026658, "grad_norm": 0.12609592080116272, "learning_rate": 1e-06, "loss": 0.049, "step": 457 }, { "clip_ratio/high_max": 0.002836095947714057, "clip_ratio/high_mean": 0.001192644518596353, "clip_ratio/low_mean": 0.001219635803863639, "clip_ratio/low_min": 0.00014255207497626543, "clip_ratio/region_mean": 0.002412280264252331, "epoch": 0.04274655980026717, "grad_norm": 0.11276479810476303, "learning_rate": 1e-06, "loss": 0.0211, "step": 458 }, { "clip_ratio/high_max": 0.002672570895811077, "clip_ratio/high_mean": 0.0013578443249571137, "clip_ratio/low_mean": 0.0011806834845629055, "clip_ratio/low_min": 0.00014179004028846975, "clip_ratio/region_mean": 0.002538527754950337, "epoch": 0.04283989290026775, "grad_norm": 0.11869664490222931, "learning_rate": 1e-06, "loss": -0.0114, "step": 459 }, { "clip_ratio/high_max": 0.002805124473525211, "clip_ratio/high_mean": 0.001244487488293089, "clip_ratio/low_mean": 0.0012005355893052183, "clip_ratio/low_min": 0.00010051603203464765, "clip_ratio/region_mean": 0.0024450231285300106, "epoch": 0.04293322600026833, "grad_norm": 0.11884006857872009, "learning_rate": 1e-06, "loss": 0.0041, "step": 460 }, { "clip_ratio/high_max": 0.0032767006050562486, "clip_ratio/high_mean": 0.0013355009250517469, "clip_ratio/low_mean": 0.001249744036613265, "clip_ratio/low_min": 9.217625211022096e-05, "clip_ratio/region_mean": 0.002585245019872673, "epoch": 0.04302655910026892, "grad_norm": 0.1172538697719574, "learning_rate": 1e-06, "loss": -0.0025, "step": 461 }, { "clip_ratio/high_max": 0.002603926412120927, "clip_ratio/high_mean": 0.0011304153340461198, "clip_ratio/low_mean": 0.0012496151612140238, "clip_ratio/low_min": 0.00015021450053609442, "clip_ratio/region_mean": 0.0023800305207259953, "epoch": 0.0431198922002695, "grad_norm": 0.11923906952142715, "learning_rate": 1e-06, "loss": 0.0108, "step": 462 }, { "clip_ratio/high_max": 0.0026303007907699794, "clip_ratio/high_mean": 0.0011468100146885263, "clip_ratio/low_mean": 0.0013169388294045348, "clip_ratio/low_min": 0.0002506978089513723, "clip_ratio/region_mean": 0.002463748838636093, "epoch": 0.04321322530027008, "grad_norm": 0.12104181200265884, "learning_rate": 1e-06, "loss": 0.0573, "step": 463 }, { "clip_ratio/high_max": 0.002296301092428621, "clip_ratio/high_mean": 0.0010385345449321903, "clip_ratio/low_mean": 0.0012297647608647821, "clip_ratio/low_min": 0.00018888493832491804, "clip_ratio/region_mean": 0.0022682993512717076, "epoch": 0.043306558400270666, "grad_norm": 0.10575677454471588, "learning_rate": 1e-06, "loss": 0.0516, "step": 464 }, { "clip_ratio/high_max": 0.0030926807958167046, "clip_ratio/high_mean": 0.001284816815314116, "clip_ratio/low_mean": 0.001207638833875535, "clip_ratio/low_min": 4.7732088205520995e-05, "clip_ratio/region_mean": 0.002492455685569439, "epoch": 0.04339989150027125, "grad_norm": 0.11239206045866013, "learning_rate": 1e-06, "loss": 0.0179, "step": 465 }, { "clip_ratio/high_max": 0.002655661228345707, "clip_ratio/high_mean": 0.001172369175037602, "clip_ratio/low_mean": 0.0011677321235765703, "clip_ratio/low_min": 2.9391017960733734e-05, "clip_ratio/region_mean": 0.0023401012949761935, "epoch": 0.04349322460027183, "grad_norm": 0.12310929596424103, "learning_rate": 1e-06, "loss": 0.01, "step": 466 }, { "clip_ratio/high_max": 0.0029448211134877056, "clip_ratio/high_mean": 0.0013379647753026802, "clip_ratio/low_mean": 0.0014643511385656893, "clip_ratio/low_min": 0.00027341156874172157, "clip_ratio/region_mean": 0.0028023159684380516, "epoch": 0.043586557700272414, "grad_norm": 0.11911243200302124, "learning_rate": 1e-06, "loss": 0.0192, "step": 467 }, { "clip_ratio/high_max": 0.002272747464303393, "clip_ratio/high_mean": 0.0010893298967857845, "clip_ratio/low_mean": 0.001343431376881199, "clip_ratio/low_min": 0.0002126211711583892, "clip_ratio/region_mean": 0.0024327613136847503, "epoch": 0.043679890800273, "grad_norm": 0.107632115483284, "learning_rate": 1e-06, "loss": 0.0168, "step": 468 }, { "clip_ratio/high_max": 0.0028668950690189376, "clip_ratio/high_mean": 0.0013185363059164956, "clip_ratio/low_mean": 0.001355787411739584, "clip_ratio/low_min": 0.0002422854468022706, "clip_ratio/region_mean": 0.002674323732207995, "epoch": 0.043773223900273585, "grad_norm": 0.12274787575006485, "learning_rate": 1e-06, "loss": 0.0124, "step": 469 }, { "clip_ratio/high_max": 0.0028060728145646863, "clip_ratio/high_mean": 0.0012101395332138054, "clip_ratio/low_mean": 0.0013300097270985134, "clip_ratio/low_min": 0.00014283526434155647, "clip_ratio/region_mean": 0.0025401492894161493, "epoch": 0.04386655700027416, "grad_norm": 0.11154117435216904, "learning_rate": 1e-06, "loss": 0.0448, "step": 470 }, { "clip_ratio/high_max": 0.0027159384699189104, "clip_ratio/high_mean": 0.0012539946110337041, "clip_ratio/low_mean": 0.0011513183235365432, "clip_ratio/low_min": 9.54349397943588e-05, "clip_ratio/region_mean": 0.0024053129163803533, "epoch": 0.04395989010027475, "grad_norm": 0.11300830543041229, "learning_rate": 1e-06, "loss": 0.0026, "step": 471 }, { "clip_ratio/high_max": 0.002835048217093572, "clip_ratio/high_mean": 0.0011275003635091707, "clip_ratio/low_mean": 0.0011441112801549025, "clip_ratio/low_min": 0.00011486600669741165, "clip_ratio/region_mean": 0.002271611650940031, "epoch": 0.044053223200275334, "grad_norm": 0.1108628660440445, "learning_rate": 1e-06, "loss": -0.0016, "step": 472 }, { "clip_ratio/high_max": 0.0023499372400692664, "clip_ratio/high_mean": 0.001188060636195587, "clip_ratio/low_mean": 0.0013718581467401236, "clip_ratio/low_min": 0.00015301451276172884, "clip_ratio/region_mean": 0.0025599187865736894, "epoch": 0.04414655630027592, "grad_norm": 0.1288527101278305, "learning_rate": 1e-06, "loss": 0.0093, "step": 473 }, { "clip_ratio/high_max": 0.002598185070382897, "clip_ratio/high_mean": 0.0011076146201958181, "clip_ratio/low_mean": 0.001352495892206207, "clip_ratio/low_min": 0.00014286307487054728, "clip_ratio/region_mean": 0.002460110503307078, "epoch": 0.0442398894002765, "grad_norm": 0.11513441056013107, "learning_rate": 1e-06, "loss": 0.0276, "step": 474 }, { "clip_ratio/high_max": 0.0031010133025120012, "clip_ratio/high_mean": 0.0013350563858693931, "clip_ratio/low_mean": 0.0012915327497466933, "clip_ratio/low_min": 0.00011129882477689534, "clip_ratio/region_mean": 0.002626589142892044, "epoch": 0.04433322250027708, "grad_norm": 0.11405772715806961, "learning_rate": 1e-06, "loss": 0.0131, "step": 475 }, { "clip_ratio/high_max": 0.002686013547645416, "clip_ratio/high_mean": 0.001177495127194561, "clip_ratio/low_mean": 0.0014041948670637794, "clip_ratio/low_min": 0.00011557006655493751, "clip_ratio/region_mean": 0.002581689986982383, "epoch": 0.04442655560027767, "grad_norm": 0.11358251422643661, "learning_rate": 1e-06, "loss": 0.028, "step": 476 }, { "clip_ratio/high_max": 0.0026814865123014897, "clip_ratio/high_mean": 0.0012361757762846537, "clip_ratio/low_mean": 0.0012965224614163162, "clip_ratio/low_min": 0.00013232356832304504, "clip_ratio/region_mean": 0.0025326982213300653, "epoch": 0.04451988870027825, "grad_norm": 0.11191324889659882, "learning_rate": 1e-06, "loss": -0.0258, "step": 477 }, { "clip_ratio/high_max": 0.0023534242245659698, "clip_ratio/high_mean": 0.0010918897951341933, "clip_ratio/low_mean": 0.0013543551649490837, "clip_ratio/low_min": 0.000266472206931212, "clip_ratio/region_mean": 0.002446244929160457, "epoch": 0.04461322180027883, "grad_norm": 0.11773058772087097, "learning_rate": 1e-06, "loss": 0.0327, "step": 478 }, { "clip_ratio/high_max": 0.0031445046988665126, "clip_ratio/high_mean": 0.001196626322780503, "clip_ratio/low_mean": 0.0013966872211312875, "clip_ratio/low_min": 3.272273443144513e-05, "clip_ratio/region_mean": 0.002593313518445939, "epoch": 0.044706554900279416, "grad_norm": 0.10964041948318481, "learning_rate": 1e-06, "loss": 0.0529, "step": 479 }, { "clip_ratio/high_max": 0.0025364956745761447, "clip_ratio/high_mean": 0.0011413568099669646, "clip_ratio/low_mean": 0.00119198762877204, "clip_ratio/low_min": 0.00012087325376342051, "clip_ratio/region_mean": 0.0023333444114541635, "epoch": 0.04479988800028, "grad_norm": 0.10653676837682724, "learning_rate": 1e-06, "loss": 0.0191, "step": 480 }, { "clip_ratio/high_max": 0.002942748680652585, "clip_ratio/high_mean": 0.0011751176934922114, "clip_ratio/low_mean": 0.001280921722354833, "clip_ratio/low_min": 0.00013952586596133187, "clip_ratio/region_mean": 0.0024560394522268325, "epoch": 0.04489322110028058, "grad_norm": 0.11577119678258896, "learning_rate": 1e-06, "loss": 0.0199, "step": 481 }, { "clip_ratio/high_max": 0.0029228743296698667, "clip_ratio/high_mean": 0.0013020505066378973, "clip_ratio/low_mean": 0.0012381026572256815, "clip_ratio/low_min": 0.00011427838308009086, "clip_ratio/region_mean": 0.0025401531893294305, "epoch": 0.044986554200281165, "grad_norm": 0.11827868968248367, "learning_rate": 1e-06, "loss": 0.0241, "step": 482 }, { "clip_ratio/high_max": 0.002798367386276368, "clip_ratio/high_mean": 0.0012768863125529606, "clip_ratio/low_mean": 0.0013054129412921611, "clip_ratio/low_min": 0.0001997903764276998, "clip_ratio/region_mean": 0.002582299232017249, "epoch": 0.04507988730028175, "grad_norm": 0.11663104593753815, "learning_rate": 1e-06, "loss": 0.0609, "step": 483 }, { "clip_ratio/high_max": 0.0031021843024063855, "clip_ratio/high_mean": 0.0011812180855486076, "clip_ratio/low_mean": 0.00136826848029159, "clip_ratio/low_min": 0.00015387914936582092, "clip_ratio/region_mean": 0.0025494865840300918, "epoch": 0.045173220400282335, "grad_norm": 0.12728598713874817, "learning_rate": 1e-06, "loss": 0.0032, "step": 484 }, { "clip_ratio/high_max": 0.0026322743942728266, "clip_ratio/high_mean": 0.0012486109917517751, "clip_ratio/low_mean": 0.0014192188828019425, "clip_ratio/low_min": 0.00014572951022273628, "clip_ratio/region_mean": 0.002667829830897972, "epoch": 0.04526655350028291, "grad_norm": 0.11983053386211395, "learning_rate": 1e-06, "loss": 0.0689, "step": 485 }, { "clip_ratio/high_max": 0.0026598095428198576, "clip_ratio/high_mean": 0.001260376116988482, "clip_ratio/low_mean": 0.0012419340982887661, "clip_ratio/low_min": 0.0001565280108479783, "clip_ratio/region_mean": 0.002502310228010174, "epoch": 0.0453598866002835, "grad_norm": 0.11784526705741882, "learning_rate": 1e-06, "loss": 0.0216, "step": 486 }, { "clip_ratio/high_max": 0.003105874486209359, "clip_ratio/high_mean": 0.0012500214343162952, "clip_ratio/low_mean": 0.0012417898742569378, "clip_ratio/low_min": 0.0002304073650520877, "clip_ratio/region_mean": 0.002491811290383339, "epoch": 0.045453219700284084, "grad_norm": 0.09820187836885452, "learning_rate": 1e-06, "loss": 0.0148, "step": 487 }, { "clip_ratio/high_max": 0.0026437555425218306, "clip_ratio/high_mean": 0.001253314498171676, "clip_ratio/low_mean": 0.0012455648575269151, "clip_ratio/low_min": 0.00010951254898827756, "clip_ratio/region_mean": 0.0024988793302327394, "epoch": 0.04554655280028467, "grad_norm": 0.11951355636119843, "learning_rate": 1e-06, "loss": -0.0125, "step": 488 }, { "clip_ratio/high_max": 0.002886282483814284, "clip_ratio/high_mean": 0.0012024885836581234, "clip_ratio/low_mean": 0.0015162914132815786, "clip_ratio/low_min": 0.00014764429579372518, "clip_ratio/region_mean": 0.002718779993301723, "epoch": 0.04563988590028525, "grad_norm": 0.10836157947778702, "learning_rate": 1e-06, "loss": 0.0558, "step": 489 }, { "clip_ratio/high_max": 0.0027689805065165274, "clip_ratio/high_mean": 0.0011672068903862964, "clip_ratio/low_mean": 0.0014792614420002792, "clip_ratio/low_min": 0.0001762853216860094, "clip_ratio/region_mean": 0.002646468310558703, "epoch": 0.04573321900028583, "grad_norm": 0.1177634596824646, "learning_rate": 1e-06, "loss": 0.0344, "step": 490 }, { "clip_ratio/high_max": 0.002386733300227206, "clip_ratio/high_mean": 0.0011448130971984938, "clip_ratio/low_mean": 0.0013705583623959683, "clip_ratio/low_min": 7.598549564136192e-05, "clip_ratio/region_mean": 0.0025153713795589283, "epoch": 0.04582655210028642, "grad_norm": 0.1204015240073204, "learning_rate": 1e-06, "loss": -0.0049, "step": 491 }, { "clip_ratio/high_max": 0.002715635906497482, "clip_ratio/high_mean": 0.001149334741057828, "clip_ratio/low_mean": 0.0013135757508280221, "clip_ratio/low_min": 0.00010997715617122594, "clip_ratio/region_mean": 0.0024629104154882953, "epoch": 0.045919885200286996, "grad_norm": 0.11512818932533264, "learning_rate": 1e-06, "loss": 0.0142, "step": 492 }, { "clip_ratio/high_max": 0.002689068001927808, "clip_ratio/high_mean": 0.0012231886175868567, "clip_ratio/low_mean": 0.0014610709076805506, "clip_ratio/low_min": 0.0002445696973154554, "clip_ratio/region_mean": 0.002684259496163577, "epoch": 0.04601321830028758, "grad_norm": 0.11573384702205658, "learning_rate": 1e-06, "loss": 0.0572, "step": 493 }, { "clip_ratio/high_max": 0.0024459760170429945, "clip_ratio/high_mean": 0.0010156796051887795, "clip_ratio/low_mean": 0.001382606500555994, "clip_ratio/low_min": 0.00022269248074735515, "clip_ratio/region_mean": 0.0023982860293472186, "epoch": 0.046106551400288166, "grad_norm": 0.10767379403114319, "learning_rate": 1e-06, "loss": 0.0276, "step": 494 }, { "clip_ratio/high_max": 0.0024828405294101685, "clip_ratio/high_mean": 0.0010891584879573202, "clip_ratio/low_mean": 0.001420930308086099, "clip_ratio/low_min": 0.00016016650988603942, "clip_ratio/region_mean": 0.0025100888015003875, "epoch": 0.04619988450028875, "grad_norm": 0.10432136803865433, "learning_rate": 1e-06, "loss": -0.0055, "step": 495 }, { "clip_ratio/high_max": 0.0029428731504594907, "clip_ratio/high_mean": 0.001489866404881468, "clip_ratio/low_mean": 0.0011714839674823452, "clip_ratio/low_min": 6.932638461876195e-05, "clip_ratio/region_mean": 0.0026613504014676437, "epoch": 0.04629321760028933, "grad_norm": 0.12726308405399323, "learning_rate": 1e-06, "loss": -0.0296, "step": 496 }, { "clip_ratio/high_max": 0.0022470306648756377, "clip_ratio/high_mean": 0.0010053157275251579, "clip_ratio/low_mean": 0.001660815134528093, "clip_ratio/low_min": 0.00039368517445836915, "clip_ratio/region_mean": 0.0026661308656912297, "epoch": 0.046386550700289915, "grad_norm": 0.12325713038444519, "learning_rate": 1e-06, "loss": 0.089, "step": 497 }, { "clip_ratio/high_max": 0.00275458848773269, "clip_ratio/high_mean": 0.0011616728334047366, "clip_ratio/low_mean": 0.0012593953797477297, "clip_ratio/low_min": 0.00014403153181774542, "clip_ratio/region_mean": 0.0024210681585827842, "epoch": 0.0464798838002905, "grad_norm": 0.11283209919929504, "learning_rate": 1e-06, "loss": 0.0357, "step": 498 }, { "clip_ratio/high_max": 0.0027246444442425855, "clip_ratio/high_mean": 0.001272648663871223, "clip_ratio/low_mean": 0.0014084099711908493, "clip_ratio/low_min": 0.0001263133617612766, "clip_ratio/region_mean": 0.0026810586714418605, "epoch": 0.046573216900291085, "grad_norm": 0.1395314782857895, "learning_rate": 1e-06, "loss": 0.0022, "step": 499 }, { "clip_ratio/high_max": 0.002887615963118151, "clip_ratio/high_mean": 0.0012879715504823253, "clip_ratio/low_mean": 0.001091747566988488, "clip_ratio/low_min": 7.684786487516249e-05, "clip_ratio/region_mean": 0.0023797191388439387, "epoch": 0.046666550000291664, "grad_norm": 0.10928663611412048, "learning_rate": 1e-06, "loss": -0.0092, "step": 500 }, { "clip_ratio/high_max": 0.003059794929868076, "clip_ratio/high_mean": 0.0013452258026518393, "clip_ratio/low_mean": 0.0012817439892387483, "clip_ratio/low_min": 0.00017086114530684426, "clip_ratio/region_mean": 0.0026269697918905877, "epoch": 0.04675988310029225, "grad_norm": 0.11967866867780685, "learning_rate": 1e-06, "loss": 0.0136, "step": 501 }, { "clip_ratio/high_max": 0.003019440388015937, "clip_ratio/high_mean": 0.0013740912872890476, "clip_ratio/low_mean": 0.001185422788694268, "clip_ratio/low_min": 0.000181762774445815, "clip_ratio/region_mean": 0.0025595141123631038, "epoch": 0.046853216200292834, "grad_norm": 0.11514464765787125, "learning_rate": 1e-06, "loss": -0.0493, "step": 502 }, { "clip_ratio/high_max": 0.0028277754827286117, "clip_ratio/high_mean": 0.001221134061779594, "clip_ratio/low_mean": 0.0012805835831386503, "clip_ratio/low_min": 0.00010841424591490068, "clip_ratio/region_mean": 0.0025017176594701596, "epoch": 0.04694654930029342, "grad_norm": 0.11402399837970734, "learning_rate": 1e-06, "loss": -0.0117, "step": 503 }, { "clip_ratio/high_max": 0.002857549501641188, "clip_ratio/high_mean": 0.0012763088379870169, "clip_ratio/low_mean": 0.0014413449353014585, "clip_ratio/low_min": 0.00015726114997960394, "clip_ratio/region_mean": 0.0027176537550985813, "epoch": 0.047039882400294, "grad_norm": 0.1121758371591568, "learning_rate": 1e-06, "loss": 0.0039, "step": 504 }, { "clip_ratio/high_max": 0.002667604901944287, "clip_ratio/high_mean": 0.0011350900822435506, "clip_ratio/low_mean": 0.0012854438318754546, "clip_ratio/low_min": 0.00011270669983787229, "clip_ratio/region_mean": 0.002420533914119005, "epoch": 0.04713321550029458, "grad_norm": 0.12903131544589996, "learning_rate": 1e-06, "loss": 0.0411, "step": 505 }, { "clip_ratio/high_max": 0.002416642746538855, "clip_ratio/high_mean": 0.0009763216694409493, "clip_ratio/low_mean": 0.0014641867419413757, "clip_ratio/low_min": 0.0002386602445767494, "clip_ratio/region_mean": 0.0024405084041063674, "epoch": 0.04722654860029517, "grad_norm": 0.10790254175662994, "learning_rate": 1e-06, "loss": 0.0784, "step": 506 }, { "clip_ratio/high_max": 0.002613978707813658, "clip_ratio/high_mean": 0.0011900486970262136, "clip_ratio/low_mean": 0.0014505546205327846, "clip_ratio/low_min": 0.00029041344441793626, "clip_ratio/region_mean": 0.002640603284817189, "epoch": 0.047319881700295746, "grad_norm": 0.11755881458520889, "learning_rate": 1e-06, "loss": 0.0575, "step": 507 }, { "clip_ratio/high_max": 0.002734697329287883, "clip_ratio/high_mean": 0.0012349998651188798, "clip_ratio/low_mean": 0.0015100925265869591, "clip_ratio/low_min": 8.998259727377445e-05, "clip_ratio/region_mean": 0.002745092337136157, "epoch": 0.04741321480029633, "grad_norm": 0.12109041213989258, "learning_rate": 1e-06, "loss": -0.0047, "step": 508 }, { "clip_ratio/high_max": 0.002644393032824155, "clip_ratio/high_mean": 0.0011360921525920276, "clip_ratio/low_mean": 0.0013230078657215927, "clip_ratio/low_min": 0.00015327829714806285, "clip_ratio/region_mean": 0.0024590999892097898, "epoch": 0.04750654790029692, "grad_norm": 0.11833036690950394, "learning_rate": 1e-06, "loss": 0.0366, "step": 509 }, { "clip_ratio/high_max": 0.0030175659630913287, "clip_ratio/high_mean": 0.0012385812806314789, "clip_ratio/low_mean": 0.0016291326901409775, "clip_ratio/low_min": 0.00031713534554000944, "clip_ratio/region_mean": 0.0028677139634964988, "epoch": 0.0475998810002975, "grad_norm": 0.11917825788259506, "learning_rate": 1e-06, "loss": 0.0518, "step": 510 }, { "clip_ratio/high_max": 0.0027569230005610734, "clip_ratio/high_mean": 0.0013071624598524068, "clip_ratio/low_mean": 0.0011490862343634944, "clip_ratio/low_min": 4.2066338664881187e-05, "clip_ratio/region_mean": 0.0024562487305956893, "epoch": 0.04769321410029808, "grad_norm": 0.11022017896175385, "learning_rate": 1e-06, "loss": 0.0022, "step": 511 }, { "clip_ratio/high_max": 0.0029090827665640973, "clip_ratio/high_mean": 0.001352490875433432, "clip_ratio/low_mean": 0.0011505107941047754, "clip_ratio/low_min": 7.641605043318123e-05, "clip_ratio/region_mean": 0.002503001618606504, "epoch": 0.047786547200298665, "grad_norm": 0.11806478351354599, "learning_rate": 1e-06, "loss": -0.0453, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013253348214285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 608.1181640625, "completions/mean_terminated_length": 561.2711791992188, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.04787988030029925, "grad_norm": 0.11305620521306992, "learning_rate": 1e-06, "loss": 0.0619, "num_tokens": 404459321.0, "reward": 0.5736868977546692, "reward_std": 0.20325154066085815, "rewards/simpleverify_reward/mean": 0.5736868977546692, "rewards/simpleverify_reward/std": 0.49454256892204285, "step": 513 }, { "clip_ratio/high_max": 0.0022938845941098407, "clip_ratio/high_mean": 0.0009037431773322169, "clip_ratio/low_mean": 0.000596653086176957, "clip_ratio/low_min": 1.528864959254861e-05, "clip_ratio/region_mean": 0.001500396290794015, "epoch": 0.047973213400299836, "grad_norm": 0.117484450340271, "learning_rate": 1e-06, "loss": 0.0128, "step": 514 }, { "clip_ratio/high_max": 0.0020708463780465536, "clip_ratio/high_mean": 0.0008220297531806864, "clip_ratio/low_mean": 0.000598522736254381, "clip_ratio/low_min": 3.841953457595082e-05, "clip_ratio/region_mean": 0.001420552478521131, "epoch": 0.048066546500300414, "grad_norm": 0.12512654066085815, "learning_rate": 1e-06, "loss": 0.0168, "step": 515 }, { "clip_ratio/high_max": 0.002422670215310063, "clip_ratio/high_mean": 0.0009986731438402785, "clip_ratio/low_mean": 0.0006161394485388882, "clip_ratio/low_min": 3.759869741770672e-05, "clip_ratio/region_mean": 0.001614812616026029, "epoch": 0.048159879600301, "grad_norm": 0.10640700906515121, "learning_rate": 1e-06, "loss": -0.0164, "step": 516 }, { "clip_ratio/high_max": 0.002371778835367877, "clip_ratio/high_mean": 0.0008489138144796016, "clip_ratio/low_mean": 0.0006929068076715339, "clip_ratio/low_min": 3.274192840763135e-05, "clip_ratio/region_mean": 0.0015418206094182096, "epoch": 0.048253212700301584, "grad_norm": 0.10331600159406662, "learning_rate": 1e-06, "loss": 0.0139, "step": 517 }, { "clip_ratio/high_max": 0.0020433677127584815, "clip_ratio/high_mean": 0.0010223066528851632, "clip_ratio/low_mean": 0.0009520077510387637, "clip_ratio/low_min": 0.00017556018701725407, "clip_ratio/region_mean": 0.0019743144293897785, "epoch": 0.04834654580030216, "grad_norm": 0.11191713809967041, "learning_rate": 1e-06, "loss": -0.0051, "step": 518 }, { "clip_ratio/high_max": 0.0022547683838638477, "clip_ratio/high_mean": 0.0009289155841543106, "clip_ratio/low_mean": 0.0009570534821250476, "clip_ratio/low_min": 6.77951884426875e-05, "clip_ratio/region_mean": 0.0018859690317185596, "epoch": 0.04843987890030275, "grad_norm": 0.11609487235546112, "learning_rate": 1e-06, "loss": 0.0236, "step": 519 }, { "clip_ratio/high_max": 0.001952126454852987, "clip_ratio/high_mean": 0.0008737843090784736, "clip_ratio/low_mean": 0.001069299403752666, "clip_ratio/low_min": 0.00014024691699887626, "clip_ratio/region_mean": 0.0019430837637628429, "epoch": 0.04853321200030333, "grad_norm": 0.10823985934257507, "learning_rate": 1e-06, "loss": 0.028, "step": 520 }, { "clip_ratio/high_max": 0.0022938304900890216, "clip_ratio/high_mean": 0.0008503883036610205, "clip_ratio/low_mean": 0.0011525513145898003, "clip_ratio/low_min": 0.00014091896809986793, "clip_ratio/region_mean": 0.0020029396837344393, "epoch": 0.04862654510030392, "grad_norm": 0.12348370254039764, "learning_rate": 1e-06, "loss": 0.0454, "step": 521 }, { "clip_ratio/high_max": 0.0020629136488423683, "clip_ratio/high_mean": 0.0008307058742502704, "clip_ratio/low_mean": 0.001059784619428683, "clip_ratio/low_min": 7.583248407172505e-05, "clip_ratio/region_mean": 0.0018904904572991654, "epoch": 0.048719878200304496, "grad_norm": 0.13124702870845795, "learning_rate": 1e-06, "loss": 0.0609, "step": 522 }, { "clip_ratio/high_max": 0.0024305735423695296, "clip_ratio/high_mean": 0.0009842896834015846, "clip_ratio/low_mean": 0.0009428224584553391, "clip_ratio/low_min": 0.00010360168289480498, "clip_ratio/region_mean": 0.0019271121127530932, "epoch": 0.04881321130030508, "grad_norm": 0.1202758252620697, "learning_rate": 1e-06, "loss": 0.0036, "step": 523 }, { "clip_ratio/high_max": 0.0021617880047415383, "clip_ratio/high_mean": 0.0010954533336189343, "clip_ratio/low_mean": 0.0007716032396274386, "clip_ratio/low_min": 4.3243162508588284e-05, "clip_ratio/region_mean": 0.001867056533228606, "epoch": 0.04890654440030567, "grad_norm": 0.11454305052757263, "learning_rate": 1e-06, "loss": -0.0066, "step": 524 }, { "clip_ratio/high_max": 0.002247830852866173, "clip_ratio/high_mean": 0.0009238106358679943, "clip_ratio/low_mean": 0.0008934875786508201, "clip_ratio/low_min": 5.8065073972102255e-05, "clip_ratio/region_mean": 0.0018172982090618461, "epoch": 0.04899987750030625, "grad_norm": 0.12081579864025116, "learning_rate": 1e-06, "loss": 0.0445, "step": 525 }, { "clip_ratio/high_max": 0.0026712418257375248, "clip_ratio/high_mean": 0.0010170068799197907, "clip_ratio/low_mean": 0.0007595505321660312, "clip_ratio/low_min": 5.329184659785824e-05, "clip_ratio/region_mean": 0.0017765574011718854, "epoch": 0.04909321060030683, "grad_norm": 0.12667986750602722, "learning_rate": 1e-06, "loss": 0.0074, "step": 526 }, { "clip_ratio/high_max": 0.0023257058855961077, "clip_ratio/high_mean": 0.000968858123087557, "clip_ratio/low_mean": 0.0008188400042854482, "clip_ratio/low_min": 4.377752520667855e-05, "clip_ratio/region_mean": 0.0017876981510198675, "epoch": 0.049186543700307415, "grad_norm": 0.12136593461036682, "learning_rate": 1e-06, "loss": 0.0112, "step": 527 }, { "clip_ratio/high_max": 0.002502967559848912, "clip_ratio/high_mean": 0.0009909093132591806, "clip_ratio/low_mean": 0.0008073075241554761, "clip_ratio/low_min": 4.310705526222591e-05, "clip_ratio/region_mean": 0.0017982168646994978, "epoch": 0.049279876800308, "grad_norm": 0.1273898333311081, "learning_rate": 1e-06, "loss": 0.0444, "step": 528 }, { "clip_ratio/high_max": 0.002444306570396293, "clip_ratio/high_mean": 0.0010627529554767534, "clip_ratio/low_mean": 0.0009356704722449649, "clip_ratio/low_min": 0.00012519238589447923, "clip_ratio/region_mean": 0.0019984234604635276, "epoch": 0.049373209900308586, "grad_norm": 0.11384916305541992, "learning_rate": 1e-06, "loss": 0.0214, "step": 529 }, { "clip_ratio/high_max": 0.0019527282674971502, "clip_ratio/high_mean": 0.0007767343377054203, "clip_ratio/low_mean": 0.0009547128865960985, "clip_ratio/low_min": 6.039664094714681e-05, "clip_ratio/region_mean": 0.0017314472279394977, "epoch": 0.049466543000309164, "grad_norm": 0.10346849262714386, "learning_rate": 1e-06, "loss": 0.035, "step": 530 }, { "clip_ratio/high_max": 0.002386239582847338, "clip_ratio/high_mean": 0.0009497268874838483, "clip_ratio/low_mean": 0.0010229159706796054, "clip_ratio/low_min": 3.5132095945300534e-05, "clip_ratio/region_mean": 0.001972642829059623, "epoch": 0.04955987610030975, "grad_norm": 0.11370111256837845, "learning_rate": 1e-06, "loss": 0.0139, "step": 531 }, { "clip_ratio/high_max": 0.0025752184228622355, "clip_ratio/high_mean": 0.0010965920228045434, "clip_ratio/low_mean": 0.0011692429725371767, "clip_ratio/low_min": 6.39326790405903e-05, "clip_ratio/region_mean": 0.0022658350571873598, "epoch": 0.049653209200310335, "grad_norm": 0.12252366542816162, "learning_rate": 1e-06, "loss": -0.0088, "step": 532 }, { "clip_ratio/high_max": 0.0025580211367923766, "clip_ratio/high_mean": 0.0010781207129184622, "clip_ratio/low_mean": 0.0011872038230649196, "clip_ratio/low_min": 4.8454917305207346e-05, "clip_ratio/region_mean": 0.0022653245760011487, "epoch": 0.04974654230031091, "grad_norm": 0.12165111303329468, "learning_rate": 1e-06, "loss": 0.001, "step": 533 }, { "clip_ratio/high_max": 0.002439030482491944, "clip_ratio/high_mean": 0.0009942480191966752, "clip_ratio/low_mean": 0.001273912737815408, "clip_ratio/low_min": 0.00016214824245253112, "clip_ratio/region_mean": 0.00226816076610703, "epoch": 0.0498398754003115, "grad_norm": 0.11765293776988983, "learning_rate": 1e-06, "loss": 0.0168, "step": 534 }, { "clip_ratio/high_max": 0.002474566863384098, "clip_ratio/high_mean": 0.0009806419529923005, "clip_ratio/low_mean": 0.0011716983208316378, "clip_ratio/low_min": 0.0001299355735682184, "clip_ratio/region_mean": 0.0021523402829188854, "epoch": 0.04993320850031208, "grad_norm": 0.12225670367479324, "learning_rate": 1e-06, "loss": 0.0325, "step": 535 }, { "clip_ratio/high_max": 0.0024211660274886526, "clip_ratio/high_mean": 0.0009722914601297816, "clip_ratio/low_mean": 0.001114476483053295, "clip_ratio/low_min": 5.219522245170083e-05, "clip_ratio/region_mean": 0.0020867679340881296, "epoch": 0.05002654160031267, "grad_norm": 0.1208510547876358, "learning_rate": 1e-06, "loss": 0.0303, "step": 536 }, { "clip_ratio/high_max": 0.002375648356974125, "clip_ratio/high_mean": 0.0009706059518066468, "clip_ratio/low_mean": 0.000983849327894859, "clip_ratio/low_min": 3.261325400671922e-05, "clip_ratio/region_mean": 0.0019544552997103892, "epoch": 0.05011987470031325, "grad_norm": 0.10823806375265121, "learning_rate": 1e-06, "loss": -0.0116, "step": 537 }, { "clip_ratio/high_max": 0.002742798660619883, "clip_ratio/high_mean": 0.0010465313534950837, "clip_ratio/low_mean": 0.0011149627734994283, "clip_ratio/low_min": 9.473226782574784e-05, "clip_ratio/region_mean": 0.002161494121537544, "epoch": 0.05021320780031383, "grad_norm": 0.10242088884115219, "learning_rate": 1e-06, "loss": 0.0558, "step": 538 }, { "clip_ratio/high_max": 0.0026951371401082724, "clip_ratio/high_mean": 0.0010893955768551677, "clip_ratio/low_mean": 0.001053309541021008, "clip_ratio/low_min": 9.017143202072475e-05, "clip_ratio/region_mean": 0.0021427051469800062, "epoch": 0.05030654090031442, "grad_norm": 0.11764271557331085, "learning_rate": 1e-06, "loss": 0.0271, "step": 539 }, { "clip_ratio/high_max": 0.0023856143525335938, "clip_ratio/high_mean": 0.0009541919480398064, "clip_ratio/low_mean": 0.0012469342036638409, "clip_ratio/low_min": 0.00019069206791755278, "clip_ratio/region_mean": 0.0022011262335581705, "epoch": 0.050399874000315, "grad_norm": 0.5091419816017151, "learning_rate": 1e-06, "loss": 0.0784, "step": 540 }, { "clip_ratio/high_max": 0.002293923324032221, "clip_ratio/high_mean": 0.001104503415263025, "clip_ratio/low_mean": 0.0010798866460390855, "clip_ratio/low_min": 6.423753711715108e-05, "clip_ratio/region_mean": 0.002184390090405941, "epoch": 0.05049320710031558, "grad_norm": 0.11446491628885269, "learning_rate": 1e-06, "loss": 0.0026, "step": 541 }, { "clip_ratio/high_max": 0.002394167640886735, "clip_ratio/high_mean": 0.00119633993745083, "clip_ratio/low_mean": 0.0010826653706317302, "clip_ratio/low_min": 0.00015781784532009624, "clip_ratio/region_mean": 0.0022790052971686237, "epoch": 0.050586540200316166, "grad_norm": 0.12535390257835388, "learning_rate": 1e-06, "loss": 0.0051, "step": 542 }, { "clip_ratio/high_max": 0.002593322533357423, "clip_ratio/high_mean": 0.0010485982857062481, "clip_ratio/low_mean": 0.0011854558179038577, "clip_ratio/low_min": 9.415505519427825e-05, "clip_ratio/region_mean": 0.0022340541254379787, "epoch": 0.05067987330031675, "grad_norm": 0.11848200112581253, "learning_rate": 1e-06, "loss": 0.0445, "step": 543 }, { "clip_ratio/high_max": 0.002403954124019947, "clip_ratio/high_mean": 0.001005131289275596, "clip_ratio/low_mean": 0.0010457935495651327, "clip_ratio/low_min": 0.00012072416757291649, "clip_ratio/region_mean": 0.002050924827926792, "epoch": 0.05077320640031733, "grad_norm": 0.11786353588104248, "learning_rate": 1e-06, "loss": -0.0262, "step": 544 }, { "clip_ratio/high_max": 0.0026527815680310596, "clip_ratio/high_mean": 0.0010455009960423922, "clip_ratio/low_mean": 0.0011517081475176383, "clip_ratio/low_min": 0.00015454444564966252, "clip_ratio/region_mean": 0.002197209141741041, "epoch": 0.050866539500317914, "grad_norm": 0.11207284033298492, "learning_rate": 1e-06, "loss": 0.0043, "step": 545 }, { "clip_ratio/high_max": 0.00240755571576301, "clip_ratio/high_mean": 0.001150607396994019, "clip_ratio/low_mean": 0.0013976555856061168, "clip_ratio/low_min": 0.00018781676044454798, "clip_ratio/region_mean": 0.0025482630589976907, "epoch": 0.0509598726003185, "grad_norm": 0.11939078569412231, "learning_rate": 1e-06, "loss": 0.0234, "step": 546 }, { "clip_ratio/high_max": 0.0026024213366326876, "clip_ratio/high_mean": 0.00108860780892428, "clip_ratio/low_mean": 0.0013370455544645665, "clip_ratio/low_min": 0.0001822290250856895, "clip_ratio/region_mean": 0.0024256533215520903, "epoch": 0.051053205700319085, "grad_norm": 0.11460070312023163, "learning_rate": 1e-06, "loss": 0.0282, "step": 547 }, { "clip_ratio/high_max": 0.0025708858338475693, "clip_ratio/high_mean": 0.0010650688191162772, "clip_ratio/low_mean": 0.0013487387805071194, "clip_ratio/low_min": 0.00010173968257731758, "clip_ratio/region_mean": 0.0024138076114468277, "epoch": 0.05114653880031966, "grad_norm": 0.11298304051160812, "learning_rate": 1e-06, "loss": 0.0066, "step": 548 }, { "clip_ratio/high_max": 0.0026097415284311865, "clip_ratio/high_mean": 0.0012252685492057935, "clip_ratio/low_mean": 0.0012879170790256467, "clip_ratio/low_min": 0.00017008917893690523, "clip_ratio/region_mean": 0.002513185601856094, "epoch": 0.05123987190032025, "grad_norm": 0.13417081534862518, "learning_rate": 1e-06, "loss": -0.0199, "step": 549 }, { "clip_ratio/high_max": 0.0027926547700189985, "clip_ratio/high_mean": 0.0012437009827408474, "clip_ratio/low_mean": 0.001228071336299763, "clip_ratio/low_min": 4.769744191435166e-05, "clip_ratio/region_mean": 0.0024717723281355575, "epoch": 0.05133320500032083, "grad_norm": 0.12110395729541779, "learning_rate": 1e-06, "loss": 0.0161, "step": 550 }, { "clip_ratio/high_max": 0.002581809188995976, "clip_ratio/high_mean": 0.0011146844881295692, "clip_ratio/low_mean": 0.001317385849688435, "clip_ratio/low_min": 0.00015514890856138663, "clip_ratio/region_mean": 0.0024320702941622585, "epoch": 0.05142653810032142, "grad_norm": 0.12100648134946823, "learning_rate": 1e-06, "loss": 0.0404, "step": 551 }, { "clip_ratio/high_max": 0.0024883095029508695, "clip_ratio/high_mean": 0.001081148293451406, "clip_ratio/low_mean": 0.0011426204418967245, "clip_ratio/low_min": 0.00013123269854986575, "clip_ratio/region_mean": 0.0022237687444430776, "epoch": 0.051519871200322, "grad_norm": 0.1108996719121933, "learning_rate": 1e-06, "loss": -0.0131, "step": 552 }, { "clip_ratio/high_max": 0.0025357191989314742, "clip_ratio/high_mean": 0.0010701419378165156, "clip_ratio/low_mean": 0.001349059271888109, "clip_ratio/low_min": 0.0001485577886342071, "clip_ratio/region_mean": 0.0024192011696868576, "epoch": 0.05161320430032258, "grad_norm": 0.11375448852777481, "learning_rate": 1e-06, "loss": 0.0044, "step": 553 }, { "clip_ratio/high_max": 0.0027622483248705976, "clip_ratio/high_mean": 0.0012032637969241478, "clip_ratio/low_mean": 0.001359804708044976, "clip_ratio/low_min": 0.0001637648656469537, "clip_ratio/region_mean": 0.002563068541348912, "epoch": 0.05170653740032317, "grad_norm": 0.5694599151611328, "learning_rate": 1e-06, "loss": 0.0026, "step": 554 }, { "clip_ratio/high_max": 0.0025685182481538504, "clip_ratio/high_mean": 0.0010880275949602947, "clip_ratio/low_mean": 0.0012558269481814932, "clip_ratio/low_min": 2.3918866645544767e-05, "clip_ratio/region_mean": 0.0023438544958480634, "epoch": 0.05179987050032375, "grad_norm": 0.11959235370159149, "learning_rate": 1e-06, "loss": 0.0456, "step": 555 }, { "clip_ratio/high_max": 0.0023812199142412283, "clip_ratio/high_mean": 0.0009857916102191666, "clip_ratio/low_mean": 0.0014461428127106046, "clip_ratio/low_min": 0.00014719656837769435, "clip_ratio/region_mean": 0.002431934430205729, "epoch": 0.05189320360032433, "grad_norm": 0.11535393446683884, "learning_rate": 1e-06, "loss": 0.0509, "step": 556 }, { "clip_ratio/high_max": 0.0023443118916475214, "clip_ratio/high_mean": 0.0010782462813949678, "clip_ratio/low_mean": 0.0012639374454010976, "clip_ratio/low_min": 9.613549627829343e-05, "clip_ratio/region_mean": 0.002342183761356864, "epoch": 0.051986536700324916, "grad_norm": 0.10409792512655258, "learning_rate": 1e-06, "loss": 0.0026, "step": 557 }, { "clip_ratio/high_max": 0.0026739557142718695, "clip_ratio/high_mean": 0.0011256642337684752, "clip_ratio/low_mean": 0.001414404370734701, "clip_ratio/low_min": 0.00015633211114618462, "clip_ratio/region_mean": 0.002540068577218335, "epoch": 0.0520798698003255, "grad_norm": 0.11572989076375961, "learning_rate": 1e-06, "loss": 0.0418, "step": 558 }, { "clip_ratio/high_max": 0.002377290184085723, "clip_ratio/high_mean": 0.00114520425995579, "clip_ratio/low_mean": 0.0012214684902573936, "clip_ratio/low_min": 0.00016913914623728488, "clip_ratio/region_mean": 0.0023666727356612682, "epoch": 0.05217320290032608, "grad_norm": 0.11622529476881027, "learning_rate": 1e-06, "loss": -0.016, "step": 559 }, { "clip_ratio/high_max": 0.0027728189670597203, "clip_ratio/high_mean": 0.0011612942726060282, "clip_ratio/low_mean": 0.0013770258738077246, "clip_ratio/low_min": 0.00013348496577236801, "clip_ratio/region_mean": 0.0025383201718796045, "epoch": 0.052266536000326665, "grad_norm": 0.12185611575841904, "learning_rate": 1e-06, "loss": 0.0078, "step": 560 }, { "clip_ratio/high_max": 0.0027572427861741744, "clip_ratio/high_mean": 0.0011848831272800453, "clip_ratio/low_mean": 0.0014325601878226735, "clip_ratio/low_min": 0.0001359142806904856, "clip_ratio/region_mean": 0.0026174432859988883, "epoch": 0.05235986910032725, "grad_norm": 0.11497235298156738, "learning_rate": 1e-06, "loss": 0.0216, "step": 561 }, { "clip_ratio/high_max": 0.0026100702743860893, "clip_ratio/high_mean": 0.0011189342512807343, "clip_ratio/low_mean": 0.0012487166241044179, "clip_ratio/low_min": 0.0002088825299324526, "clip_ratio/region_mean": 0.0023676509226788767, "epoch": 0.052453202200327835, "grad_norm": 0.11312676966190338, "learning_rate": 1e-06, "loss": 0.0163, "step": 562 }, { "clip_ratio/high_max": 0.0027594675339059904, "clip_ratio/high_mean": 0.0011417407695262227, "clip_ratio/low_mean": 0.0013484936607710551, "clip_ratio/low_min": 7.538398403994506e-05, "clip_ratio/region_mean": 0.0024902344230213203, "epoch": 0.05254653530032841, "grad_norm": 0.12057051807641983, "learning_rate": 1e-06, "loss": 0.0549, "step": 563 }, { "clip_ratio/high_max": 0.0025116864489973523, "clip_ratio/high_mean": 0.0011374509158486035, "clip_ratio/low_mean": 0.0011922353878617287, "clip_ratio/low_min": 5.325620077201165e-05, "clip_ratio/region_mean": 0.0023296863146242686, "epoch": 0.052639868400329, "grad_norm": 0.11815297603607178, "learning_rate": 1e-06, "loss": -0.0028, "step": 564 }, { "clip_ratio/high_max": 0.0022530104979523458, "clip_ratio/high_mean": 0.0009999750145652797, "clip_ratio/low_mean": 0.001297568465815857, "clip_ratio/low_min": 5.7107778957288247e-05, "clip_ratio/region_mean": 0.0022975434621912427, "epoch": 0.052733201500329584, "grad_norm": 0.11339268833398819, "learning_rate": 1e-06, "loss": 0.0452, "step": 565 }, { "clip_ratio/high_max": 0.00248011741132359, "clip_ratio/high_mean": 0.0011184336144651752, "clip_ratio/low_mean": 0.001197814883198589, "clip_ratio/low_min": 9.738258540892275e-05, "clip_ratio/region_mean": 0.002316248464921955, "epoch": 0.05282653460033017, "grad_norm": 0.1128305122256279, "learning_rate": 1e-06, "loss": 0.0291, "step": 566 }, { "clip_ratio/high_max": 0.0026540504331933334, "clip_ratio/high_mean": 0.001125739247072488, "clip_ratio/low_mean": 0.0010907519463216886, "clip_ratio/low_min": 6.852324986539315e-05, "clip_ratio/region_mean": 0.0022164912370499223, "epoch": 0.05291986770033075, "grad_norm": 0.10074286162853241, "learning_rate": 1e-06, "loss": 0.023, "step": 567 }, { "clip_ratio/high_max": 0.002943443407275481, "clip_ratio/high_mean": 0.0012677842987613985, "clip_ratio/low_mean": 0.0012342028094280977, "clip_ratio/low_min": 9.10896087589208e-05, "clip_ratio/region_mean": 0.0025019871100084856, "epoch": 0.05301320080033133, "grad_norm": 0.11268115043640137, "learning_rate": 1e-06, "loss": -0.0035, "step": 568 }, { "clip_ratio/high_max": 0.0030965563346398994, "clip_ratio/high_mean": 0.0012327997974352911, "clip_ratio/low_mean": 0.0013548318529501557, "clip_ratio/low_min": 7.121752241801005e-05, "clip_ratio/region_mean": 0.00258763170131715, "epoch": 0.05310653390033192, "grad_norm": 0.11782576143741608, "learning_rate": 1e-06, "loss": 0.0143, "step": 569 }, { "clip_ratio/high_max": 0.0024675445092725568, "clip_ratio/high_mean": 0.001130588338128291, "clip_ratio/low_mean": 0.0013564268774644006, "clip_ratio/low_min": 8.581010752095608e-05, "clip_ratio/region_mean": 0.00248701519012684, "epoch": 0.053199867000332496, "grad_norm": 0.10595322400331497, "learning_rate": 1e-06, "loss": 0.0081, "step": 570 }, { "clip_ratio/high_max": 0.0028545003006001934, "clip_ratio/high_mean": 0.0012170638947281986, "clip_ratio/low_mean": 0.0011107133177574724, "clip_ratio/low_min": 6.0930760810151696e-05, "clip_ratio/region_mean": 0.0023277771688299254, "epoch": 0.05329320010033308, "grad_norm": 0.11617729067802429, "learning_rate": 1e-06, "loss": 0.0054, "step": 571 }, { "clip_ratio/high_max": 0.0027577823784668, "clip_ratio/high_mean": 0.001325263536273269, "clip_ratio/low_mean": 0.0010861684058909304, "clip_ratio/low_min": 9.476870582147967e-05, "clip_ratio/region_mean": 0.0024114319603540935, "epoch": 0.053386533200333666, "grad_norm": 0.1142783835530281, "learning_rate": 1e-06, "loss": 0.0058, "step": 572 }, { "clip_ratio/high_max": 0.00254910382500384, "clip_ratio/high_mean": 0.0010939086096186657, "clip_ratio/low_mean": 0.0012719541082333308, "clip_ratio/low_min": 4.8896016778599005e-05, "clip_ratio/region_mean": 0.002365862703300081, "epoch": 0.05347986630033425, "grad_norm": 4.01998233795166, "learning_rate": 1e-06, "loss": 0.0517, "step": 573 }, { "clip_ratio/high_max": 0.0024419658875558525, "clip_ratio/high_mean": 0.0011289127105555963, "clip_ratio/low_mean": 0.0013267945359984878, "clip_ratio/low_min": 0.00014345609451993369, "clip_ratio/region_mean": 0.0024557072974857874, "epoch": 0.05357319940033483, "grad_norm": 0.12506824731826782, "learning_rate": 1e-06, "loss": 0.0342, "step": 574 }, { "clip_ratio/high_max": 0.0032088188527268358, "clip_ratio/high_mean": 0.0014075598355702823, "clip_ratio/low_mean": 0.0013373718538787216, "clip_ratio/low_min": 0.0002551303150539752, "clip_ratio/region_mean": 0.0027449316839920357, "epoch": 0.053666532500335415, "grad_norm": 0.14207856357097626, "learning_rate": 1e-06, "loss": 0.0253, "step": 575 }, { "clip_ratio/high_max": 0.002352807998249773, "clip_ratio/high_mean": 0.000967294190559187, "clip_ratio/low_mean": 0.0013993982320243958, "clip_ratio/low_min": 0.0002421751614747336, "clip_ratio/region_mean": 0.0023666924389544874, "epoch": 0.053759865600336, "grad_norm": 0.17422142624855042, "learning_rate": 1e-06, "loss": 0.0424, "step": 576 }, { "clip_ratio/high_max": 0.002820875815814361, "clip_ratio/high_mean": 0.00132078929891577, "clip_ratio/low_mean": 0.0012677728991548065, "clip_ratio/low_min": 3.611976899264846e-05, "clip_ratio/region_mean": 0.0025885622017085552, "epoch": 0.053853198700336585, "grad_norm": 0.1095849797129631, "learning_rate": 1e-06, "loss": 0.0011, "step": 577 }, { "clip_ratio/high_max": 0.002381853155384306, "clip_ratio/high_mean": 0.0009803358170756837, "clip_ratio/low_mean": 0.001393428690789733, "clip_ratio/low_min": 0.000167344353030785, "clip_ratio/region_mean": 0.0023737644514767453, "epoch": 0.053946531800337164, "grad_norm": 0.10863973945379257, "learning_rate": 1e-06, "loss": 0.0661, "step": 578 }, { "clip_ratio/high_max": 0.0026694867701735348, "clip_ratio/high_mean": 0.0011427963618189096, "clip_ratio/low_mean": 0.0014050775534997229, "clip_ratio/low_min": 0.00018719708896242082, "clip_ratio/region_mean": 0.002547873795265332, "epoch": 0.05403986490033775, "grad_norm": 0.11603222787380219, "learning_rate": 1e-06, "loss": 0.043, "step": 579 }, { "clip_ratio/high_max": 0.003046132915187627, "clip_ratio/high_mean": 0.001369683272059774, "clip_ratio/low_mean": 0.001360263649985427, "clip_ratio/low_min": 0.00024008280888665468, "clip_ratio/region_mean": 0.0027299469657009467, "epoch": 0.054133198000338334, "grad_norm": 0.12285274267196655, "learning_rate": 1e-06, "loss": 0.0466, "step": 580 }, { "clip_ratio/high_max": 0.003033043903997168, "clip_ratio/high_mean": 0.0012858603586209938, "clip_ratio/low_mean": 0.0012211418252263684, "clip_ratio/low_min": 0.00012805784808733733, "clip_ratio/region_mean": 0.002507002187485341, "epoch": 0.05422653110033892, "grad_norm": 0.13200485706329346, "learning_rate": 1e-06, "loss": -0.0044, "step": 581 }, { "clip_ratio/high_max": 0.002537907668738626, "clip_ratio/high_mean": 0.0011251911018916871, "clip_ratio/low_mean": 0.001203045088914223, "clip_ratio/low_min": 9.355846486869268e-05, "clip_ratio/region_mean": 0.0023282361653400585, "epoch": 0.0543198642003395, "grad_norm": 0.11725179851055145, "learning_rate": 1e-06, "loss": 0.0117, "step": 582 }, { "clip_ratio/high_max": 0.0027843005445902236, "clip_ratio/high_mean": 0.0011539477782207541, "clip_ratio/low_mean": 0.0011952742042922182, "clip_ratio/low_min": 9.549087644700194e-05, "clip_ratio/region_mean": 0.002349221962504089, "epoch": 0.05441319730034008, "grad_norm": 0.1376514434814453, "learning_rate": 1e-06, "loss": 0.0452, "step": 583 }, { "clip_ratio/high_max": 0.0031040568646858446, "clip_ratio/high_mean": 0.0012525530037237331, "clip_ratio/low_mean": 0.001269270591365057, "clip_ratio/low_min": 0.0001405190305376891, "clip_ratio/region_mean": 0.0025218236332875676, "epoch": 0.05450653040034067, "grad_norm": 0.10900263488292694, "learning_rate": 1e-06, "loss": 0.0408, "step": 584 }, { "clip_ratio/high_max": 0.002753465436398983, "clip_ratio/high_mean": 0.0011064121936215088, "clip_ratio/low_mean": 0.0013788378055323847, "clip_ratio/low_min": 0.0002352017154407804, "clip_ratio/region_mean": 0.0024852499409462325, "epoch": 0.054599863500341246, "grad_norm": 0.16603875160217285, "learning_rate": 1e-06, "loss": 0.0496, "step": 585 }, { "clip_ratio/high_max": 0.0023427415944752283, "clip_ratio/high_mean": 0.0011698730486386921, "clip_ratio/low_mean": 0.0011950733241974376, "clip_ratio/low_min": 0.00013395195492194034, "clip_ratio/region_mean": 0.0023649463619221933, "epoch": 0.05469319660034183, "grad_norm": 0.11876308917999268, "learning_rate": 1e-06, "loss": 0.0114, "step": 586 }, { "clip_ratio/high_max": 0.0025814474720391445, "clip_ratio/high_mean": 0.0010721033577283379, "clip_ratio/low_mean": 0.0015546890717814676, "clip_ratio/low_min": 0.00011013074436050374, "clip_ratio/region_mean": 0.0026267923967679963, "epoch": 0.054786529700342416, "grad_norm": 0.1272927224636078, "learning_rate": 1e-06, "loss": 0.0536, "step": 587 }, { "clip_ratio/high_max": 0.0025365491092088632, "clip_ratio/high_mean": 0.0012100449202989694, "clip_ratio/low_mean": 0.0013668191568285692, "clip_ratio/low_min": 0.0001328071994066704, "clip_ratio/region_mean": 0.0025768641789909452, "epoch": 0.054879862800343, "grad_norm": 0.1082814484834671, "learning_rate": 1e-06, "loss": 0.0392, "step": 588 }, { "clip_ratio/high_max": 0.0032100252574309707, "clip_ratio/high_mean": 0.001355565051198937, "clip_ratio/low_mean": 0.0014500094184768386, "clip_ratio/low_min": 8.803711807559012e-05, "clip_ratio/region_mean": 0.002805574498779606, "epoch": 0.05497319590034358, "grad_norm": 0.11585516482591629, "learning_rate": 1e-06, "loss": -0.032, "step": 589 }, { "clip_ratio/high_max": 0.002697335039556492, "clip_ratio/high_mean": 0.001279321004403755, "clip_ratio/low_mean": 0.0014537881543219555, "clip_ratio/low_min": 0.00021021044358349172, "clip_ratio/region_mean": 0.0027331091550877318, "epoch": 0.055066529000344165, "grad_norm": 0.12308011949062347, "learning_rate": 1e-06, "loss": 0.0241, "step": 590 }, { "clip_ratio/high_max": 0.0030548342838301323, "clip_ratio/high_mean": 0.001186602905363543, "clip_ratio/low_mean": 0.0012894511673948728, "clip_ratio/low_min": 9.116056571656372e-05, "clip_ratio/region_mean": 0.0024760540254646912, "epoch": 0.05515986210034475, "grad_norm": 0.12500542402267456, "learning_rate": 1e-06, "loss": 0.0453, "step": 591 }, { "clip_ratio/high_max": 0.002761060077318689, "clip_ratio/high_mean": 0.001086662981833797, "clip_ratio/low_mean": 0.0012974886922165751, "clip_ratio/low_min": 4.474154229683336e-05, "clip_ratio/region_mean": 0.002384151695878245, "epoch": 0.055253195200345336, "grad_norm": 0.1094045415520668, "learning_rate": 1e-06, "loss": 0.0615, "step": 592 }, { "clip_ratio/high_max": 0.002913882060965989, "clip_ratio/high_mean": 0.0014260840471251868, "clip_ratio/low_mean": 0.0013511558681784663, "clip_ratio/low_min": 0.00013532372940971982, "clip_ratio/region_mean": 0.00277723995532142, "epoch": 0.055346528300345914, "grad_norm": 0.12525342404842377, "learning_rate": 1e-06, "loss": 0.0005, "step": 593 }, { "clip_ratio/high_max": 0.0024594295682618394, "clip_ratio/high_mean": 0.0010692717914935201, "clip_ratio/low_mean": 0.0016086845644167624, "clip_ratio/low_min": 0.00039245718835445587, "clip_ratio/region_mean": 0.002677956370462198, "epoch": 0.0554398614003465, "grad_norm": 56.352691650390625, "learning_rate": 1e-06, "loss": 0.0616, "step": 594 }, { "clip_ratio/high_max": 0.0029018530767643824, "clip_ratio/high_mean": 0.0012118536251364276, "clip_ratio/low_mean": 0.00147406952601159, "clip_ratio/low_min": 8.366415204363875e-05, "clip_ratio/region_mean": 0.0026859231656999327, "epoch": 0.055533194500347084, "grad_norm": 0.12930914759635925, "learning_rate": 1e-06, "loss": 0.0326, "step": 595 }, { "clip_ratio/high_max": 0.002946642227470875, "clip_ratio/high_mean": 0.001237690965353977, "clip_ratio/low_mean": 0.001491498351242626, "clip_ratio/low_min": 0.00020836423846049001, "clip_ratio/region_mean": 0.0027291893420624547, "epoch": 0.05562652760034766, "grad_norm": 0.1227770671248436, "learning_rate": 1e-06, "loss": 0.0001, "step": 596 }, { "clip_ratio/high_max": 0.002995901209942531, "clip_ratio/high_mean": 0.0012675451253016945, "clip_ratio/low_mean": 0.0013838087652402464, "clip_ratio/low_min": 0.000258805377598037, "clip_ratio/region_mean": 0.0026513538978178985, "epoch": 0.05571986070034825, "grad_norm": 0.10995718091726303, "learning_rate": 1e-06, "loss": 0.0032, "step": 597 }, { "clip_ratio/high_max": 0.002594681085611228, "clip_ratio/high_mean": 0.0011624784929153975, "clip_ratio/low_mean": 0.0014592079423891846, "clip_ratio/low_min": 0.0002578041221568128, "clip_ratio/region_mean": 0.0026216864498564973, "epoch": 0.05581319380034883, "grad_norm": 0.11237485706806183, "learning_rate": 1e-06, "loss": 0.0488, "step": 598 }, { "clip_ratio/high_max": 0.002932543524366338, "clip_ratio/high_mean": 0.001473056930990424, "clip_ratio/low_mean": 0.0014338534601847641, "clip_ratio/low_min": 0.0002242224545625504, "clip_ratio/region_mean": 0.0029069104202790186, "epoch": 0.05590652690034942, "grad_norm": 0.1207275539636612, "learning_rate": 1e-06, "loss": -0.0014, "step": 599 }, { "clip_ratio/high_max": 0.003175174104399048, "clip_ratio/high_mean": 0.0012610718913492747, "clip_ratio/low_mean": 0.0012987626614631154, "clip_ratio/low_min": 0.0001589283410794451, "clip_ratio/region_mean": 0.0025598346401238814, "epoch": 0.055999860000349996, "grad_norm": 0.11774420738220215, "learning_rate": 1e-06, "loss": -0.0118, "step": 600 }, { "clip_ratio/high_max": 0.003017504022864159, "clip_ratio/high_mean": 0.0012930801167385653, "clip_ratio/low_mean": 0.0013243357425380964, "clip_ratio/low_min": 0.0001299791383644333, "clip_ratio/region_mean": 0.002617415797431022, "epoch": 0.05609319310035058, "grad_norm": 0.10962067544460297, "learning_rate": 1e-06, "loss": -0.0033, "step": 601 }, { "clip_ratio/high_max": 0.002408566484518815, "clip_ratio/high_mean": 0.0011331773057463579, "clip_ratio/low_mean": 0.001511076607130235, "clip_ratio/low_min": 0.00019113015514449216, "clip_ratio/region_mean": 0.002644253909238614, "epoch": 0.05618652620035117, "grad_norm": 0.11985189467668533, "learning_rate": 1e-06, "loss": 0.0242, "step": 602 }, { "clip_ratio/high_max": 0.0029132041236152872, "clip_ratio/high_mean": 0.001350120499409968, "clip_ratio/low_mean": 0.0014923360940883867, "clip_ratio/low_min": 0.00021058001129858894, "clip_ratio/region_mean": 0.002842456553480588, "epoch": 0.05627985930035175, "grad_norm": 0.11751165986061096, "learning_rate": 1e-06, "loss": 0.0063, "step": 603 }, { "clip_ratio/high_max": 0.0033193147683050483, "clip_ratio/high_mean": 0.0012864779091614764, "clip_ratio/low_mean": 0.0014792458059673663, "clip_ratio/low_min": 0.00015850120144023094, "clip_ratio/region_mean": 0.0027657236860250123, "epoch": 0.05637319240035233, "grad_norm": 0.1329202950000763, "learning_rate": 1e-06, "loss": 0.0033, "step": 604 }, { "clip_ratio/high_max": 0.0031351850266219117, "clip_ratio/high_mean": 0.0013310584363352973, "clip_ratio/low_mean": 0.0017237356369150802, "clip_ratio/low_min": 0.00024564145769545576, "clip_ratio/region_mean": 0.0030547940696123987, "epoch": 0.056466525500352915, "grad_norm": 0.11847762018442154, "learning_rate": 1e-06, "loss": 0.0426, "step": 605 }, { "clip_ratio/high_max": 0.002764619726804085, "clip_ratio/high_mean": 0.0011567974725039676, "clip_ratio/low_mean": 0.00151305268809665, "clip_ratio/low_min": 0.00012714555305137765, "clip_ratio/region_mean": 0.0026698501751525328, "epoch": 0.0565598586003535, "grad_norm": 0.11249374598264694, "learning_rate": 1e-06, "loss": 0.0405, "step": 606 }, { "clip_ratio/high_max": 0.0031606808697688393, "clip_ratio/high_mean": 0.0012830835257773288, "clip_ratio/low_mean": 0.0014400148429558612, "clip_ratio/low_min": 0.00024713687707844656, "clip_ratio/region_mean": 0.002723098346905317, "epoch": 0.056653191700354086, "grad_norm": 0.11802874505519867, "learning_rate": 1e-06, "loss": 0.0183, "step": 607 }, { "clip_ratio/high_max": 0.003382972688996233, "clip_ratio/high_mean": 0.0013577564604929648, "clip_ratio/low_mean": 0.0014644498041889165, "clip_ratio/low_min": 0.00012427875026332913, "clip_ratio/region_mean": 0.002822206217388157, "epoch": 0.056746524800354664, "grad_norm": 0.11685334146022797, "learning_rate": 1e-06, "loss": 0.0238, "step": 608 }, { "clip_ratio/high_max": 0.002916577403084375, "clip_ratio/high_mean": 0.0012009530437353533, "clip_ratio/low_mean": 0.0015385350925498642, "clip_ratio/low_min": 0.00014482042115560034, "clip_ratio/region_mean": 0.002739488161751069, "epoch": 0.05683985790035525, "grad_norm": 0.13132593035697937, "learning_rate": 1e-06, "loss": 0.0021, "step": 609 }, { "clip_ratio/high_max": 0.002846942596079316, "clip_ratio/high_mean": 0.0012231544424139429, "clip_ratio/low_mean": 0.0018343539195484482, "clip_ratio/low_min": 0.00025931542040780187, "clip_ratio/region_mean": 0.0030575084092561156, "epoch": 0.056933191000355834, "grad_norm": 0.12039321660995483, "learning_rate": 1e-06, "loss": 0.058, "step": 610 }, { "clip_ratio/high_max": 0.0024926215992309153, "clip_ratio/high_mean": 0.0011125080909550888, "clip_ratio/low_mean": 0.0014655374761787243, "clip_ratio/low_min": 5.463968773256056e-05, "clip_ratio/region_mean": 0.00257804557622876, "epoch": 0.05702652410035641, "grad_norm": 0.10390654951334, "learning_rate": 1e-06, "loss": 0.0349, "step": 611 }, { "clip_ratio/high_max": 0.0028619467266253196, "clip_ratio/high_mean": 0.0012707455280178692, "clip_ratio/low_mean": 0.001434564601368038, "clip_ratio/low_min": 0.00012798572970496025, "clip_ratio/region_mean": 0.002705310173041653, "epoch": 0.057119857200357, "grad_norm": 0.11988730728626251, "learning_rate": 1e-06, "loss": 0.0246, "step": 612 }, { "clip_ratio/high_max": 0.0029487076026271097, "clip_ratio/high_mean": 0.0013413972046691924, "clip_ratio/low_mean": 0.0015923161372484174, "clip_ratio/low_min": 7.876181825849926e-05, "clip_ratio/region_mean": 0.0029337133528315462, "epoch": 0.05721319030035758, "grad_norm": 0.12389518320560455, "learning_rate": 1e-06, "loss": 0.0066, "step": 613 }, { "clip_ratio/high_max": 0.0031791578949196264, "clip_ratio/high_mean": 0.0013678569448529743, "clip_ratio/low_mean": 0.0015660199242120143, "clip_ratio/low_min": 0.0003195867648173589, "clip_ratio/region_mean": 0.002933876901806798, "epoch": 0.05730652340035817, "grad_norm": 0.18571637570858002, "learning_rate": 1e-06, "loss": 0.0513, "step": 614 }, { "clip_ratio/high_max": 0.003133909725875128, "clip_ratio/high_mean": 0.0012686830505117541, "clip_ratio/low_mean": 0.001598485014255857, "clip_ratio/low_min": 0.00016843396952026524, "clip_ratio/region_mean": 0.002867168055672664, "epoch": 0.05739985650035875, "grad_norm": 0.10513711720705032, "learning_rate": 1e-06, "loss": -0.0017, "step": 615 }, { "clip_ratio/high_max": 0.0030570370799978264, "clip_ratio/high_mean": 0.0013293832744238898, "clip_ratio/low_mean": 0.0013120000185153913, "clip_ratio/low_min": 9.725565905682743e-05, "clip_ratio/region_mean": 0.0026413833402330056, "epoch": 0.05749318960035933, "grad_norm": 0.12293823063373566, "learning_rate": 1e-06, "loss": -0.0123, "step": 616 }, { "clip_ratio/high_max": 0.003299827942100819, "clip_ratio/high_mean": 0.0014612694867537357, "clip_ratio/low_mean": 0.0013754075043834746, "clip_ratio/low_min": 0.00019858918312820606, "clip_ratio/region_mean": 0.0028366769838612527, "epoch": 0.05758652270035992, "grad_norm": 0.11497383564710617, "learning_rate": 1e-06, "loss": 0.0078, "step": 617 }, { "clip_ratio/high_max": 0.0031383688692585565, "clip_ratio/high_mean": 0.0013155838314560242, "clip_ratio/low_mean": 0.0015653529044357128, "clip_ratio/low_min": 0.0001314942483077175, "clip_ratio/region_mean": 0.0028809367067879066, "epoch": 0.0576798558003605, "grad_norm": 0.10731995105743408, "learning_rate": 1e-06, "loss": 0.0328, "step": 618 }, { "clip_ratio/high_max": 0.0026809280388988554, "clip_ratio/high_mean": 0.0012080861488357186, "clip_ratio/low_mean": 0.0019553159745555604, "clip_ratio/low_min": 0.00034075777512043715, "clip_ratio/region_mean": 0.0031634021288482472, "epoch": 0.05777318890036108, "grad_norm": 0.11680079251527786, "learning_rate": 1e-06, "loss": 0.018, "step": 619 }, { "clip_ratio/high_max": 0.0026415278334752657, "clip_ratio/high_mean": 0.001101141799153993, "clip_ratio/low_mean": 0.0017012868120218627, "clip_ratio/low_min": 0.00011531197924341541, "clip_ratio/region_mean": 0.0028024285420542583, "epoch": 0.057866522000361666, "grad_norm": 0.10773280262947083, "learning_rate": 1e-06, "loss": 0.0477, "step": 620 }, { "clip_ratio/high_max": 0.002566681148891803, "clip_ratio/high_mean": 0.0011477045100036776, "clip_ratio/low_mean": 0.0018021069336100481, "clip_ratio/low_min": 0.0002992164900206262, "clip_ratio/region_mean": 0.002949811481812503, "epoch": 0.05795985510036225, "grad_norm": 0.10499243438243866, "learning_rate": 1e-06, "loss": 0.0322, "step": 621 }, { "clip_ratio/high_max": 0.003188927636074368, "clip_ratio/high_mean": 0.001448851136956364, "clip_ratio/low_mean": 0.0015544437646894949, "clip_ratio/low_min": 0.0001414512289557024, "clip_ratio/region_mean": 0.003003294885274954, "epoch": 0.05805318820036283, "grad_norm": 0.12040673941373825, "learning_rate": 1e-06, "loss": -0.0306, "step": 622 }, { "clip_ratio/high_max": 0.0033192904229508713, "clip_ratio/high_mean": 0.0013392424698395189, "clip_ratio/low_mean": 0.0015561041727778502, "clip_ratio/low_min": 0.00020276426039345097, "clip_ratio/region_mean": 0.0028953466389793903, "epoch": 0.058146521300363414, "grad_norm": 0.13179415464401245, "learning_rate": 1e-06, "loss": 0.0449, "step": 623 }, { "clip_ratio/high_max": 0.003174079567543231, "clip_ratio/high_mean": 0.0012380525295156986, "clip_ratio/low_mean": 0.0016126817208714783, "clip_ratio/low_min": 0.00027876444983121473, "clip_ratio/region_mean": 0.0028507342212833464, "epoch": 0.058239854400364, "grad_norm": 0.11259009689092636, "learning_rate": 1e-06, "loss": 0.0643, "step": 624 }, { "clip_ratio/high_max": 0.0030925749597372487, "clip_ratio/high_mean": 0.0011946113481826615, "clip_ratio/low_mean": 0.001656367712712381, "clip_ratio/low_min": 8.057512241066433e-05, "clip_ratio/region_mean": 0.002850979071808979, "epoch": 0.058333187500364585, "grad_norm": 0.11597687751054764, "learning_rate": 1e-06, "loss": 0.0747, "step": 625 }, { "clip_ratio/high_max": 0.0028178526554256678, "clip_ratio/high_mean": 0.0012578428359120153, "clip_ratio/low_mean": 0.0013812756678817095, "clip_ratio/low_min": 0.00013082805708108936, "clip_ratio/region_mean": 0.0026391184655949473, "epoch": 0.05842652060036516, "grad_norm": 0.1210600957274437, "learning_rate": 1e-06, "loss": 0.0121, "step": 626 }, { "clip_ratio/high_max": 0.003386612137546763, "clip_ratio/high_mean": 0.0014514080285152886, "clip_ratio/low_mean": 0.001366763244732283, "clip_ratio/low_min": 0.00011221376189496368, "clip_ratio/region_mean": 0.0028181711677461863, "epoch": 0.05851985370036575, "grad_norm": 0.11617657542228699, "learning_rate": 1e-06, "loss": -0.0077, "step": 627 }, { "clip_ratio/high_max": 0.0034576827310957015, "clip_ratio/high_mean": 0.001391749014146626, "clip_ratio/low_mean": 0.0015356980948126875, "clip_ratio/low_min": 0.00013342677448235918, "clip_ratio/region_mean": 0.002927447007095907, "epoch": 0.05861318680036633, "grad_norm": 0.11794304847717285, "learning_rate": 1e-06, "loss": 0.0147, "step": 628 }, { "clip_ratio/high_max": 0.0028020602476317436, "clip_ratio/high_mean": 0.0012487415315263206, "clip_ratio/low_mean": 0.0014458097030001227, "clip_ratio/low_min": 0.00011995400018349756, "clip_ratio/region_mean": 0.0026945512436213903, "epoch": 0.05870651990036692, "grad_norm": 0.10536064207553864, "learning_rate": 1e-06, "loss": 0.0432, "step": 629 }, { "clip_ratio/high_max": 0.00295761782763293, "clip_ratio/high_mean": 0.0012614783117896877, "clip_ratio/low_mean": 0.0015288841859728564, "clip_ratio/low_min": 9.996736662287731e-05, "clip_ratio/region_mean": 0.0027903624868486077, "epoch": 0.0587998530003675, "grad_norm": 0.1187087669968605, "learning_rate": 1e-06, "loss": 0.0319, "step": 630 }, { "clip_ratio/high_max": 0.0033629569443291984, "clip_ratio/high_mean": 0.0014393862220458686, "clip_ratio/low_mean": 0.001504298510553781, "clip_ratio/low_min": 9.758964733919129e-05, "clip_ratio/region_mean": 0.002943684739875607, "epoch": 0.05889318610036808, "grad_norm": 0.11731305718421936, "learning_rate": 1e-06, "loss": 0.0368, "step": 631 }, { "clip_ratio/high_max": 0.0025362236992805265, "clip_ratio/high_mean": 0.0011733405772247352, "clip_ratio/low_mean": 0.0014804913189436775, "clip_ratio/low_min": 6.86692173985648e-05, "clip_ratio/region_mean": 0.002653831907082349, "epoch": 0.05898651920036867, "grad_norm": 0.11169592291116714, "learning_rate": 1e-06, "loss": 0.039, "step": 632 }, { "clip_ratio/high_max": 0.002341818224522285, "clip_ratio/high_mean": 0.001009324987535365, "clip_ratio/low_mean": 0.0016309768834616989, "clip_ratio/low_min": 0.00016387016694352496, "clip_ratio/region_mean": 0.0026403017691336572, "epoch": 0.05907985230036925, "grad_norm": 0.10973479598760605, "learning_rate": 1e-06, "loss": 0.0802, "step": 633 }, { "clip_ratio/high_max": 0.003161839136737399, "clip_ratio/high_mean": 0.001390608878864441, "clip_ratio/low_mean": 0.001491869319579564, "clip_ratio/low_min": 0.00013183088321966352, "clip_ratio/region_mean": 0.0028824782057199627, "epoch": 0.05917318540036983, "grad_norm": 0.10946914553642273, "learning_rate": 1e-06, "loss": 0.0234, "step": 634 }, { "clip_ratio/high_max": 0.002981438963615801, "clip_ratio/high_mean": 0.0012554377171909437, "clip_ratio/low_mean": 0.001528946559119504, "clip_ratio/low_min": 0.00020870682055829093, "clip_ratio/region_mean": 0.0027843843126902357, "epoch": 0.059266518500370416, "grad_norm": 0.11729389429092407, "learning_rate": 1e-06, "loss": 0.0103, "step": 635 }, { "clip_ratio/high_max": 0.0028081943601137027, "clip_ratio/high_mean": 0.0012507840474427212, "clip_ratio/low_mean": 0.0015382885794679169, "clip_ratio/low_min": 0.0002337202031412744, "clip_ratio/region_mean": 0.0027890726341865957, "epoch": 0.059359851600371, "grad_norm": 0.12659451365470886, "learning_rate": 1e-06, "loss": 0.0091, "step": 636 }, { "clip_ratio/high_max": 0.0027931696458836086, "clip_ratio/high_mean": 0.00120272222920903, "clip_ratio/low_mean": 0.0017151880128949415, "clip_ratio/low_min": 4.1624176446930505e-05, "clip_ratio/region_mean": 0.0029179101838963106, "epoch": 0.05945318470037158, "grad_norm": 0.11294504255056381, "learning_rate": 1e-06, "loss": 0.0289, "step": 637 }, { "clip_ratio/high_max": 0.002893213117204141, "clip_ratio/high_mean": 0.0010985886874550488, "clip_ratio/low_mean": 0.001700567481748294, "clip_ratio/low_min": 0.0001591967975400621, "clip_ratio/region_mean": 0.002799156151013449, "epoch": 0.059546517800372165, "grad_norm": 0.1137283593416214, "learning_rate": 1e-06, "loss": 0.0446, "step": 638 }, { "clip_ratio/high_max": 0.003044217679416761, "clip_ratio/high_mean": 0.0012996570476389024, "clip_ratio/low_mean": 0.0014075102044444066, "clip_ratio/low_min": 9.399888313055271e-05, "clip_ratio/region_mean": 0.002707167252083309, "epoch": 0.05963985090037275, "grad_norm": 0.11308509111404419, "learning_rate": 1e-06, "loss": -0.0169, "step": 639 }, { "clip_ratio/high_max": 0.002678591539734043, "clip_ratio/high_mean": 0.0012060598564858083, "clip_ratio/low_mean": 0.0017007987589749973, "clip_ratio/low_min": 0.00013706863319384865, "clip_ratio/region_mean": 0.0029068586736684665, "epoch": 0.059733184000373335, "grad_norm": 0.11297818273305893, "learning_rate": 1e-06, "loss": 0.041, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012058803013392905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 609.9797973632812, "completions/mean_terminated_length": 567.429443359375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.05982651710037391, "grad_norm": 0.1292581856250763, "learning_rate": 1e-06, "loss": 0.0193, "num_tokens": 485893065.0, "reward": 0.5891374945640564, "reward_std": 0.19757407903671265, "rewards/simpleverify_reward/mean": 0.5891374945640564, "rewards/simpleverify_reward/std": 0.4919925034046173, "step": 641 }, { "clip_ratio/high_max": 0.0021748554172518197, "clip_ratio/high_mean": 0.0009954512443073327, "clip_ratio/low_mean": 0.0005990892204863485, "clip_ratio/low_min": 1.7090511391870677e-05, "clip_ratio/region_mean": 0.0015945403974910732, "epoch": 0.0599198502003745, "grad_norm": 0.1223272904753685, "learning_rate": 1e-06, "loss": 0.017, "step": 642 }, { "clip_ratio/high_max": 0.0018065920812659897, "clip_ratio/high_mean": 0.0008048314630286768, "clip_ratio/low_mean": 0.0005610977905234904, "clip_ratio/low_min": 4.243129205860896e-05, "clip_ratio/region_mean": 0.0013659292217198526, "epoch": 0.060013183300375084, "grad_norm": 0.12602369487285614, "learning_rate": 1e-06, "loss": 0.0023, "step": 643 }, { "clip_ratio/high_max": 0.0020949177342117764, "clip_ratio/high_mean": 0.0009120941485889489, "clip_ratio/low_mean": 0.0006879099582874915, "clip_ratio/low_min": 6.950073020561831e-05, "clip_ratio/region_mean": 0.0016000041214283556, "epoch": 0.06010651640037567, "grad_norm": 0.11360519379377365, "learning_rate": 1e-06, "loss": 0.0057, "step": 644 }, { "clip_ratio/high_max": 0.0018249375534651335, "clip_ratio/high_mean": 0.0008150199009833159, "clip_ratio/low_mean": 0.0006981166598052368, "clip_ratio/low_min": 3.552807447704254e-05, "clip_ratio/region_mean": 0.001513136532594217, "epoch": 0.06019984950037625, "grad_norm": 0.1097647026181221, "learning_rate": 1e-06, "loss": 0.0, "step": 645 }, { "clip_ratio/high_max": 0.0020257036412658636, "clip_ratio/high_mean": 0.0008049288662732579, "clip_ratio/low_mean": 0.0008355631180165801, "clip_ratio/low_min": 5.522824176296126e-05, "clip_ratio/region_mean": 0.0016404919952037744, "epoch": 0.06029318260037683, "grad_norm": 0.12153657525777817, "learning_rate": 1e-06, "loss": 0.0226, "step": 646 }, { "clip_ratio/high_max": 0.002552907095378032, "clip_ratio/high_mean": 0.0009890525543596596, "clip_ratio/low_mean": 0.000778659514253377, "clip_ratio/low_min": 2.18493278225651e-05, "clip_ratio/region_mean": 0.0017677120013104286, "epoch": 0.06038651570037742, "grad_norm": 0.47265344858169556, "learning_rate": 1e-06, "loss": 0.0194, "step": 647 }, { "clip_ratio/high_max": 0.002179434915888123, "clip_ratio/high_mean": 0.0009403069689142285, "clip_ratio/low_mean": 0.0008266525037470274, "clip_ratio/low_min": 3.832214861176908e-05, "clip_ratio/region_mean": 0.0017669594817562029, "epoch": 0.060479848800377996, "grad_norm": 0.1060282364487648, "learning_rate": 1e-06, "loss": -0.0001, "step": 648 }, { "clip_ratio/high_max": 0.0019388398286537267, "clip_ratio/high_mean": 0.0008207587234210223, "clip_ratio/low_mean": 0.0008359317253052723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016566904741921462, "epoch": 0.06057318190037858, "grad_norm": 0.11346718668937683, "learning_rate": 1e-06, "loss": 0.0049, "step": 649 }, { "clip_ratio/high_max": 0.0021642236315528862, "clip_ratio/high_mean": 0.0008643663350085262, "clip_ratio/low_mean": 0.0009825911620282568, "clip_ratio/low_min": 0.00012871516446466558, "clip_ratio/region_mean": 0.0018469574642949738, "epoch": 0.060666515000379166, "grad_norm": 0.10506804287433624, "learning_rate": 1e-06, "loss": 0.0366, "step": 650 }, { "clip_ratio/high_max": 0.0018231422181997914, "clip_ratio/high_mean": 0.0008435171821474796, "clip_ratio/low_mean": 0.001088205670384923, "clip_ratio/low_min": 0.00010629372263792902, "clip_ratio/region_mean": 0.0019317228143336251, "epoch": 0.06075984810037975, "grad_norm": 0.1282629817724228, "learning_rate": 1e-06, "loss": 0.0514, "step": 651 }, { "clip_ratio/high_max": 0.0021607658491120674, "clip_ratio/high_mean": 0.0008906453622330446, "clip_ratio/low_mean": 0.0011829749691969482, "clip_ratio/low_min": 0.00023498674363509053, "clip_ratio/region_mean": 0.002073620315059088, "epoch": 0.06085318120038033, "grad_norm": 0.10715054720640182, "learning_rate": 1e-06, "loss": 0.0603, "step": 652 }, { "clip_ratio/high_max": 0.002410183828033041, "clip_ratio/high_mean": 0.0009837968354986515, "clip_ratio/low_mean": 0.0009434997627977282, "clip_ratio/low_min": 6.215477696969174e-05, "clip_ratio/region_mean": 0.0019272965437266976, "epoch": 0.060946514300380915, "grad_norm": 0.11937594413757324, "learning_rate": 1e-06, "loss": 0.0328, "step": 653 }, { "clip_ratio/high_max": 0.0024948293867055327, "clip_ratio/high_mean": 0.0010107441357831703, "clip_ratio/low_mean": 0.0009076120550162159, "clip_ratio/low_min": 1.8241353245684877e-05, "clip_ratio/region_mean": 0.0019183562108082697, "epoch": 0.0610398474003815, "grad_norm": 0.12305525690317154, "learning_rate": 1e-06, "loss": 0.0097, "step": 654 }, { "clip_ratio/high_max": 0.002373426097619813, "clip_ratio/high_mean": 0.0010299016303179087, "clip_ratio/low_mean": 0.0009326153976871865, "clip_ratio/low_min": 6.017443956807256e-05, "clip_ratio/region_mean": 0.0019625170461949892, "epoch": 0.061133180500382085, "grad_norm": 0.11715622991323471, "learning_rate": 1e-06, "loss": 0.0078, "step": 655 }, { "clip_ratio/high_max": 0.001954997111170087, "clip_ratio/high_mean": 0.0009834772390604485, "clip_ratio/low_mean": 0.0009611216191842686, "clip_ratio/low_min": 9.363302615383873e-05, "clip_ratio/region_mean": 0.0019445988873485476, "epoch": 0.061226513600382663, "grad_norm": 0.11515656858682632, "learning_rate": 1e-06, "loss": 0.0282, "step": 656 }, { "clip_ratio/high_max": 0.002431402994261589, "clip_ratio/high_mean": 0.0010576932418189244, "clip_ratio/low_mean": 0.0010466949352121446, "clip_ratio/low_min": 0.00013691628373635467, "clip_ratio/region_mean": 0.0021043881788500585, "epoch": 0.06131984670038325, "grad_norm": 0.12049777060747147, "learning_rate": 1e-06, "loss": 0.0106, "step": 657 }, { "clip_ratio/high_max": 0.002144788690202404, "clip_ratio/high_mean": 0.001019918498059269, "clip_ratio/low_mean": 0.0009519484938209644, "clip_ratio/low_min": 0.00016175894779735245, "clip_ratio/region_mean": 0.001971866993699223, "epoch": 0.061413179800383834, "grad_norm": 0.11356595158576965, "learning_rate": 1e-06, "loss": 0.007, "step": 658 }, { "clip_ratio/high_max": 0.0021800879621878266, "clip_ratio/high_mean": 0.0009550613358442206, "clip_ratio/low_mean": 0.0010843024974747095, "clip_ratio/low_min": 9.379323364555603e-05, "clip_ratio/region_mean": 0.002039363869698718, "epoch": 0.06150651290038442, "grad_norm": 0.12183915823698044, "learning_rate": 1e-06, "loss": 0.029, "step": 659 }, { "clip_ratio/high_max": 0.0025504031509626657, "clip_ratio/high_mean": 0.00108420853575808, "clip_ratio/low_mean": 0.0011944966045120964, "clip_ratio/low_min": 0.00013418217895377893, "clip_ratio/region_mean": 0.0022787051493651234, "epoch": 0.061599846000385, "grad_norm": 0.1095348596572876, "learning_rate": 1e-06, "loss": 0.0127, "step": 660 }, { "clip_ratio/high_max": 0.002132608959072968, "clip_ratio/high_mean": 0.0008687532008480048, "clip_ratio/low_mean": 0.0011514902544149663, "clip_ratio/low_min": 0.00012971089927305002, "clip_ratio/region_mean": 0.0020202434752718545, "epoch": 0.06169317910038558, "grad_norm": 0.12734737992286682, "learning_rate": 1e-06, "loss": 0.0849, "step": 661 }, { "clip_ratio/high_max": 0.0027423021601862274, "clip_ratio/high_mean": 0.0011134156411571894, "clip_ratio/low_mean": 0.0010306433559890138, "clip_ratio/low_min": 3.0472036087303422e-05, "clip_ratio/region_mean": 0.002144059028069023, "epoch": 0.06178651220038617, "grad_norm": 0.11798778921365738, "learning_rate": 1e-06, "loss": 0.0016, "step": 662 }, { "clip_ratio/high_max": 0.002260082990687806, "clip_ratio/high_mean": 0.0010156289717997424, "clip_ratio/low_mean": 0.0011134239121020073, "clip_ratio/low_min": 0.00022802861167292576, "clip_ratio/region_mean": 0.0021290528820827603, "epoch": 0.061879845300386746, "grad_norm": 0.1136472076177597, "learning_rate": 1e-06, "loss": 0.0168, "step": 663 }, { "clip_ratio/high_max": 0.002391207504842896, "clip_ratio/high_mean": 0.0009677176240074914, "clip_ratio/low_mean": 0.0009122883857344277, "clip_ratio/low_min": 6.210313949850388e-05, "clip_ratio/region_mean": 0.0018800059879140463, "epoch": 0.06197317840038733, "grad_norm": 0.11377140879631042, "learning_rate": 1e-06, "loss": 0.0243, "step": 664 }, { "clip_ratio/high_max": 0.0024142058900906704, "clip_ratio/high_mean": 0.0009860117061180063, "clip_ratio/low_mean": 0.0010925081669483916, "clip_ratio/low_min": 0.00016608188343525399, "clip_ratio/region_mean": 0.0020785199085366912, "epoch": 0.062066511500387916, "grad_norm": 0.1222008764743805, "learning_rate": 1e-06, "loss": 0.0258, "step": 665 }, { "clip_ratio/high_max": 0.00221043895726325, "clip_ratio/high_mean": 0.0009247103316738503, "clip_ratio/low_mean": 0.0010705184067774098, "clip_ratio/low_min": 0.00015207652086246526, "clip_ratio/region_mean": 0.0019952287548221648, "epoch": 0.0621598446003885, "grad_norm": 0.11470475047826767, "learning_rate": 1e-06, "loss": 0.0706, "step": 666 }, { "clip_ratio/high_max": 0.002046182482445147, "clip_ratio/high_mean": 0.0010548308746365365, "clip_ratio/low_mean": 0.0011983355962001951, "clip_ratio/low_min": 0.00011185111543454695, "clip_ratio/region_mean": 0.0022531664217240177, "epoch": 0.06225317770038908, "grad_norm": 0.11831323802471161, "learning_rate": 1e-06, "loss": 0.0183, "step": 667 }, { "clip_ratio/high_max": 0.0023645868386665825, "clip_ratio/high_mean": 0.0009916117160173599, "clip_ratio/low_mean": 0.0012133550735597964, "clip_ratio/low_min": 0.00011389496103220154, "clip_ratio/region_mean": 0.0022049667895771563, "epoch": 0.062346510800389665, "grad_norm": 0.12757113575935364, "learning_rate": 1e-06, "loss": 0.0364, "step": 668 }, { "clip_ratio/high_max": 0.0024345343408640474, "clip_ratio/high_mean": 0.0011065738308388973, "clip_ratio/low_mean": 0.0011969369097641902, "clip_ratio/low_min": 4.826254735235125e-05, "clip_ratio/region_mean": 0.002303510751517024, "epoch": 0.06243984390039025, "grad_norm": 0.12438348680734634, "learning_rate": 1e-06, "loss": 0.0029, "step": 669 }, { "clip_ratio/high_max": 0.0019871227705152705, "clip_ratio/high_mean": 0.0008757966425037012, "clip_ratio/low_mean": 0.00103066617884906, "clip_ratio/low_min": 5.639658684231108e-05, "clip_ratio/region_mean": 0.0019064628504565917, "epoch": 0.06253317700039084, "grad_norm": 0.11792006343603134, "learning_rate": 1e-06, "loss": -0.0008, "step": 670 }, { "clip_ratio/high_max": 0.0023602003675478045, "clip_ratio/high_mean": 0.001051755029038759, "clip_ratio/low_mean": 0.0012430751412466634, "clip_ratio/low_min": 0.00018060886031889822, "clip_ratio/region_mean": 0.0022948301339056343, "epoch": 0.06262651010039141, "grad_norm": 0.11470434069633484, "learning_rate": 1e-06, "loss": -0.0023, "step": 671 }, { "clip_ratio/high_max": 0.00284223556809593, "clip_ratio/high_mean": 0.0012327261283644475, "clip_ratio/low_mean": 0.0010937725273834076, "clip_ratio/low_min": 0.00011392452324798796, "clip_ratio/region_mean": 0.0023264986521098763, "epoch": 0.062719843200392, "grad_norm": 0.12206578999757767, "learning_rate": 1e-06, "loss": -0.0226, "step": 672 }, { "clip_ratio/high_max": 0.002285585542267654, "clip_ratio/high_mean": 0.0010097148151544388, "clip_ratio/low_mean": 0.0011519285435497295, "clip_ratio/low_min": 0.00012479134875320597, "clip_ratio/region_mean": 0.0021616434169118293, "epoch": 0.06281317630039258, "grad_norm": 0.11518562585115433, "learning_rate": 1e-06, "loss": 0.0236, "step": 673 }, { "clip_ratio/high_max": 0.0021758834627689794, "clip_ratio/high_mean": 0.0009735883795656264, "clip_ratio/low_mean": 0.0011830461517092772, "clip_ratio/low_min": 6.17078912910074e-05, "clip_ratio/region_mean": 0.0021566345749306493, "epoch": 0.06290650940039316, "grad_norm": 0.1052321121096611, "learning_rate": 1e-06, "loss": 0.0373, "step": 674 }, { "clip_ratio/high_max": 0.002513353480026126, "clip_ratio/high_mean": 0.0010438256676934543, "clip_ratio/low_mean": 0.0013691830754396506, "clip_ratio/low_min": 0.00015871398773015244, "clip_ratio/region_mean": 0.0024130087695084512, "epoch": 0.06299984250039375, "grad_norm": 0.12379326671361923, "learning_rate": 1e-06, "loss": 0.0563, "step": 675 }, { "clip_ratio/high_max": 0.002718938012549188, "clip_ratio/high_mean": 0.001030139610520564, "clip_ratio/low_mean": 0.0012635992534342222, "clip_ratio/low_min": 0.0001264313450519694, "clip_ratio/region_mean": 0.002293738885782659, "epoch": 0.06309317560039433, "grad_norm": 0.12343112379312515, "learning_rate": 1e-06, "loss": 0.0314, "step": 676 }, { "clip_ratio/high_max": 0.0025925431109499186, "clip_ratio/high_mean": 0.001092134727514349, "clip_ratio/low_mean": 0.0010619158092595171, "clip_ratio/low_min": 3.811750502791256e-05, "clip_ratio/region_mean": 0.0021540505331358872, "epoch": 0.06318650870039491, "grad_norm": 0.11260983347892761, "learning_rate": 1e-06, "loss": 0.009, "step": 677 }, { "clip_ratio/high_max": 0.003240023215766996, "clip_ratio/high_mean": 0.0012721459461317863, "clip_ratio/low_mean": 0.0010284298605256481, "clip_ratio/low_min": 5.104638239572523e-05, "clip_ratio/region_mean": 0.0023005758557701483, "epoch": 0.0632798418003955, "grad_norm": 0.12015233933925629, "learning_rate": 1e-06, "loss": -0.012, "step": 678 }, { "clip_ratio/high_max": 0.0026589845947455615, "clip_ratio/high_mean": 0.0011535403646121267, "clip_ratio/low_mean": 0.0010735983196354937, "clip_ratio/low_min": 0.00011731040285667405, "clip_ratio/region_mean": 0.0022271386624197476, "epoch": 0.06337317490039608, "grad_norm": 0.12267571687698364, "learning_rate": 1e-06, "loss": 0.0142, "step": 679 }, { "clip_ratio/high_max": 0.0025322256187791936, "clip_ratio/high_mean": 0.0010675332887331024, "clip_ratio/low_mean": 0.0011451830541773234, "clip_ratio/low_min": 0.00015298763446480734, "clip_ratio/region_mean": 0.0022127163465484045, "epoch": 0.06346650800039666, "grad_norm": 0.3585658073425293, "learning_rate": 1e-06, "loss": 0.0511, "step": 680 }, { "clip_ratio/high_max": 0.0023519998358096927, "clip_ratio/high_mean": 0.0009954732122423593, "clip_ratio/low_mean": 0.0012203052538097836, "clip_ratio/low_min": 0.00014456269491347484, "clip_ratio/region_mean": 0.002215778462414164, "epoch": 0.06355984110039725, "grad_norm": 0.20242321491241455, "learning_rate": 1e-06, "loss": 0.0491, "step": 681 }, { "clip_ratio/high_max": 0.002439431962557137, "clip_ratio/high_mean": 0.0009475969072809676, "clip_ratio/low_mean": 0.0013035246010986157, "clip_ratio/low_min": 0.00019228864766773768, "clip_ratio/region_mean": 0.0022511215356644243, "epoch": 0.06365317420039783, "grad_norm": 0.10807636380195618, "learning_rate": 1e-06, "loss": 0.0789, "step": 682 }, { "clip_ratio/high_max": 0.002316114077984821, "clip_ratio/high_mean": 0.0009486514791205991, "clip_ratio/low_mean": 0.0012019726309517864, "clip_ratio/low_min": 0.0001298677152590244, "clip_ratio/region_mean": 0.0021506240955204703, "epoch": 0.06374650730039842, "grad_norm": 0.1070273295044899, "learning_rate": 1e-06, "loss": 0.0298, "step": 683 }, { "clip_ratio/high_max": 0.002226147538749501, "clip_ratio/high_mean": 0.0010514173700357787, "clip_ratio/low_mean": 0.0012112994663766585, "clip_ratio/low_min": 9.296199641539715e-05, "clip_ratio/region_mean": 0.0022627168364124373, "epoch": 0.063839840400399, "grad_norm": 0.11416471749544144, "learning_rate": 1e-06, "loss": 0.0339, "step": 684 }, { "clip_ratio/high_max": 0.0025248121601180173, "clip_ratio/high_mean": 0.0010145499363716226, "clip_ratio/low_mean": 0.0012123338365199743, "clip_ratio/low_min": 7.565199302916881e-05, "clip_ratio/region_mean": 0.0022268837565206923, "epoch": 0.06393317350039958, "grad_norm": 0.11256154626607895, "learning_rate": 1e-06, "loss": 0.0129, "step": 685 }, { "clip_ratio/high_max": 0.002097346165101044, "clip_ratio/high_mean": 0.0009084048760996666, "clip_ratio/low_mean": 0.0011575487078516744, "clip_ratio/low_min": 1.950686601048801e-05, "clip_ratio/region_mean": 0.002065953565761447, "epoch": 0.06402650660040017, "grad_norm": 0.11213254183530807, "learning_rate": 1e-06, "loss": 0.0465, "step": 686 }, { "clip_ratio/high_max": 0.002256944280816242, "clip_ratio/high_mean": 0.0010432040271552978, "clip_ratio/low_mean": 0.0012024112547805998, "clip_ratio/low_min": 0.0002010854204854695, "clip_ratio/region_mean": 0.002245615287392866, "epoch": 0.06411983970040075, "grad_norm": 0.11101026833057404, "learning_rate": 1e-06, "loss": 0.0416, "step": 687 }, { "clip_ratio/high_max": 0.002001195040065795, "clip_ratio/high_mean": 0.0009077958966372535, "clip_ratio/low_mean": 0.0011917410647583893, "clip_ratio/low_min": 0.0001307397496930207, "clip_ratio/region_mean": 0.0020995369923184626, "epoch": 0.06421317280040133, "grad_norm": 0.10993636399507523, "learning_rate": 1e-06, "loss": 0.0459, "step": 688 }, { "clip_ratio/high_max": 0.0030557825666619465, "clip_ratio/high_mean": 0.0010894620136241429, "clip_ratio/low_mean": 0.0010890003431995865, "clip_ratio/low_min": 7.668862235732377e-05, "clip_ratio/region_mean": 0.0021784623531857505, "epoch": 0.06430650590040192, "grad_norm": 0.11251064389944077, "learning_rate": 1e-06, "loss": 0.0068, "step": 689 }, { "clip_ratio/high_max": 0.0025424556006328203, "clip_ratio/high_mean": 0.0010000574275181862, "clip_ratio/low_mean": 0.0013550070289056748, "clip_ratio/low_min": 0.00015537701074208599, "clip_ratio/region_mean": 0.0023550644400529563, "epoch": 0.0643998390004025, "grad_norm": 0.11436987668275833, "learning_rate": 1e-06, "loss": 0.046, "step": 690 }, { "clip_ratio/high_max": 0.0024922296215663664, "clip_ratio/high_mean": 0.0011286482476862147, "clip_ratio/low_mean": 0.0011835709301522002, "clip_ratio/low_min": 0.00011363453631929588, "clip_ratio/region_mean": 0.002312219097802881, "epoch": 0.06449317210040308, "grad_norm": 0.1161859780550003, "learning_rate": 1e-06, "loss": 0.0096, "step": 691 }, { "clip_ratio/high_max": 0.00299485755385831, "clip_ratio/high_mean": 0.0012416012050380232, "clip_ratio/low_mean": 0.001249080705747474, "clip_ratio/low_min": 0.00010149267836823128, "clip_ratio/region_mean": 0.0024906818798626773, "epoch": 0.06458650520040367, "grad_norm": 0.12285619229078293, "learning_rate": 1e-06, "loss": 0.0326, "step": 692 }, { "clip_ratio/high_max": 0.0027557476132642478, "clip_ratio/high_mean": 0.0010278811569151003, "clip_ratio/low_mean": 0.0012661598811973818, "clip_ratio/low_min": 3.7130223063286394e-05, "clip_ratio/region_mean": 0.0022940410635783337, "epoch": 0.06467983830040425, "grad_norm": 0.11306583136320114, "learning_rate": 1e-06, "loss": 0.0664, "step": 693 }, { "clip_ratio/high_max": 0.0024865824343578424, "clip_ratio/high_mean": 0.0010564833082753466, "clip_ratio/low_mean": 0.0011395050332794199, "clip_ratio/low_min": 8.61471016833093e-05, "clip_ratio/region_mean": 0.0021959883597446606, "epoch": 0.06477317140040484, "grad_norm": 0.10932648926973343, "learning_rate": 1e-06, "loss": 0.0481, "step": 694 }, { "clip_ratio/high_max": 0.0026135559819522314, "clip_ratio/high_mean": 0.0011820854015240911, "clip_ratio/low_mean": 0.0012931187702633906, "clip_ratio/low_min": 0.00024147951353370445, "clip_ratio/region_mean": 0.0024752041717874818, "epoch": 0.06486650450040542, "grad_norm": 0.12203668802976608, "learning_rate": 1e-06, "loss": 0.0139, "step": 695 }, { "clip_ratio/high_max": 0.0029835702225682326, "clip_ratio/high_mean": 0.0012344263232080266, "clip_ratio/low_mean": 0.0009928158397087827, "clip_ratio/low_min": 1.1378117960703094e-05, "clip_ratio/region_mean": 0.0022272421265370212, "epoch": 0.064959837600406, "grad_norm": 0.10823118686676025, "learning_rate": 1e-06, "loss": -0.0247, "step": 696 }, { "clip_ratio/high_max": 0.002486766898073256, "clip_ratio/high_mean": 0.001023224062009831, "clip_ratio/low_mean": 0.0012401856038195547, "clip_ratio/low_min": 0.00011983016520389356, "clip_ratio/region_mean": 0.0022634096385445446, "epoch": 0.06505317070040659, "grad_norm": 0.11041391640901566, "learning_rate": 1e-06, "loss": -0.0081, "step": 697 }, { "clip_ratio/high_max": 0.003181499705533497, "clip_ratio/high_mean": 0.001253966176591348, "clip_ratio/low_mean": 0.0012152980743849184, "clip_ratio/low_min": 0.0001974070673895767, "clip_ratio/region_mean": 0.002469264349201694, "epoch": 0.06514650380040717, "grad_norm": 0.12527211010456085, "learning_rate": 1e-06, "loss": 0.0065, "step": 698 }, { "clip_ratio/high_max": 0.0025787828853935935, "clip_ratio/high_mean": 0.0011685543322528247, "clip_ratio/low_mean": 0.0011808987892436562, "clip_ratio/low_min": 5.9521842558751814e-05, "clip_ratio/region_mean": 0.002349453112401534, "epoch": 0.06523983690040774, "grad_norm": 0.1265258938074112, "learning_rate": 1e-06, "loss": -0.005, "step": 699 }, { "clip_ratio/high_max": 0.0027186387960682623, "clip_ratio/high_mean": 0.0011066180850320961, "clip_ratio/low_mean": 0.0011967343016294762, "clip_ratio/low_min": 0.00011390815052436665, "clip_ratio/region_mean": 0.002303352397575509, "epoch": 0.06533317000040834, "grad_norm": 0.11757099628448486, "learning_rate": 1e-06, "loss": 0.0237, "step": 700 }, { "clip_ratio/high_max": 0.0026916003262158483, "clip_ratio/high_mean": 0.0011009733643732034, "clip_ratio/low_mean": 0.0010691498573578428, "clip_ratio/low_min": 4.0817776607582346e-05, "clip_ratio/region_mean": 0.0021701232544728555, "epoch": 0.06542650310040891, "grad_norm": 0.17064279317855835, "learning_rate": 1e-06, "loss": -0.0163, "step": 701 }, { "clip_ratio/high_max": 0.0029597807661048137, "clip_ratio/high_mean": 0.001204866810439853, "clip_ratio/low_mean": 0.0011565085005713627, "clip_ratio/low_min": 0.00011697697118506767, "clip_ratio/region_mean": 0.0023613753364770673, "epoch": 0.06551983620040949, "grad_norm": 0.11354461312294006, "learning_rate": 1e-06, "loss": 0.0107, "step": 702 }, { "clip_ratio/high_max": 0.002540388082707068, "clip_ratio/high_mean": 0.0011217966948606772, "clip_ratio/low_mean": 0.0011787978692154866, "clip_ratio/low_min": 0.00022767904556530993, "clip_ratio/region_mean": 0.002300594547705259, "epoch": 0.06561316930041008, "grad_norm": 0.10583575069904327, "learning_rate": 1e-06, "loss": 0.025, "step": 703 }, { "clip_ratio/high_max": 0.0025225327408406883, "clip_ratio/high_mean": 0.0012109802883060183, "clip_ratio/low_mean": 0.0012246007536305115, "clip_ratio/low_min": 0.00024415691314061405, "clip_ratio/region_mean": 0.0024355810237466358, "epoch": 0.06570650240041066, "grad_norm": 0.11225082725286484, "learning_rate": 1e-06, "loss": 0.0502, "step": 704 }, { "clip_ratio/high_max": 0.0025045224465429783, "clip_ratio/high_mean": 0.0011060026445193216, "clip_ratio/low_mean": 0.001219853980728658, "clip_ratio/low_min": 0.00011860257745865965, "clip_ratio/region_mean": 0.0023258565852302127, "epoch": 0.06579983550041126, "grad_norm": 0.12580445408821106, "learning_rate": 1e-06, "loss": 0.0599, "step": 705 }, { "clip_ratio/high_max": 0.0028461952388170175, "clip_ratio/high_mean": 0.0012014506664854707, "clip_ratio/low_mean": 0.001123272806580644, "clip_ratio/low_min": 9.10626758923172e-05, "clip_ratio/region_mean": 0.0023247234857990406, "epoch": 0.06589316860041183, "grad_norm": 0.11595622450113297, "learning_rate": 1e-06, "loss": 0.0131, "step": 706 }, { "clip_ratio/high_max": 0.0028481978151830845, "clip_ratio/high_mean": 0.0013104877689329442, "clip_ratio/low_mean": 0.0011574751588341314, "clip_ratio/low_min": 9.656323345552664e-05, "clip_ratio/region_mean": 0.002467962920491118, "epoch": 0.06598650170041241, "grad_norm": 0.1162097305059433, "learning_rate": 1e-06, "loss": 0.0064, "step": 707 }, { "clip_ratio/high_max": 0.0028246581423445605, "clip_ratio/high_mean": 0.0011492847515910398, "clip_ratio/low_mean": 0.0010771316101454431, "clip_ratio/low_min": 6.393207877408713e-05, "clip_ratio/region_mean": 0.002226416349003557, "epoch": 0.066079834800413, "grad_norm": 0.11277379095554352, "learning_rate": 1e-06, "loss": -0.0009, "step": 708 }, { "clip_ratio/high_max": 0.0027570801830734126, "clip_ratio/high_mean": 0.0011877820343215717, "clip_ratio/low_mean": 0.001361203430860769, "clip_ratio/low_min": 8.253211581177311e-05, "clip_ratio/region_mean": 0.0025489854597253725, "epoch": 0.06617316790041358, "grad_norm": 0.11838237196207047, "learning_rate": 1e-06, "loss": 0.0378, "step": 709 }, { "clip_ratio/high_max": 0.002480254028341733, "clip_ratio/high_mean": 0.0010067512557725422, "clip_ratio/low_mean": 0.0012594378385983873, "clip_ratio/low_min": 0.00012312238141021226, "clip_ratio/region_mean": 0.002266189141664654, "epoch": 0.06626650100041416, "grad_norm": 0.11159567534923553, "learning_rate": 1e-06, "loss": 0.0409, "step": 710 }, { "clip_ratio/high_max": 0.0028501835113274865, "clip_ratio/high_mean": 0.0011509106188896112, "clip_ratio/low_mean": 0.0011076549635617994, "clip_ratio/low_min": 7.029519838397391e-05, "clip_ratio/region_mean": 0.00225856555334758, "epoch": 0.06635983410041475, "grad_norm": 0.1090884730219841, "learning_rate": 1e-06, "loss": -0.019, "step": 711 }, { "clip_ratio/high_max": 0.0025015131250256673, "clip_ratio/high_mean": 0.0011309608998999465, "clip_ratio/low_mean": 0.0013818166880810168, "clip_ratio/low_min": 0.0001644355534153874, "clip_ratio/region_mean": 0.002512777646188624, "epoch": 0.06645316720041533, "grad_norm": 0.1104603111743927, "learning_rate": 1e-06, "loss": 0.0116, "step": 712 }, { "clip_ratio/high_max": 0.002846267518179957, "clip_ratio/high_mean": 0.001182878570034518, "clip_ratio/low_mean": 0.0012490217668528203, "clip_ratio/low_min": 0.0001320163682976272, "clip_ratio/region_mean": 0.002431900327792391, "epoch": 0.06654650030041591, "grad_norm": 0.11057287454605103, "learning_rate": 1e-06, "loss": 0.0394, "step": 713 }, { "clip_ratio/high_max": 0.0026766815717564896, "clip_ratio/high_mean": 0.0011324906154186465, "clip_ratio/low_mean": 0.001155380730779143, "clip_ratio/low_min": 0.00015753878324176185, "clip_ratio/region_mean": 0.0022878713498357683, "epoch": 0.0666398334004165, "grad_norm": 0.11118446290493011, "learning_rate": 1e-06, "loss": 0.0163, "step": 714 }, { "clip_ratio/high_max": 0.0028064691941835918, "clip_ratio/high_mean": 0.0011794291567639448, "clip_ratio/low_mean": 0.0013450968253891915, "clip_ratio/low_min": 7.189430152720888e-05, "clip_ratio/region_mean": 0.0025245259748771787, "epoch": 0.06673316650041708, "grad_norm": 0.10817889869213104, "learning_rate": 1e-06, "loss": 0.0139, "step": 715 }, { "clip_ratio/high_max": 0.002478754802723415, "clip_ratio/high_mean": 0.0010416226614324842, "clip_ratio/low_mean": 0.0014740600272489246, "clip_ratio/low_min": 0.00015510835419263458, "clip_ratio/region_mean": 0.0025156826595775783, "epoch": 0.06682649960041767, "grad_norm": 0.12320508062839508, "learning_rate": 1e-06, "loss": 0.0807, "step": 716 }, { "clip_ratio/high_max": 0.002839442102413159, "clip_ratio/high_mean": 0.0012444161147868726, "clip_ratio/low_mean": 0.0014227368628780823, "clip_ratio/low_min": 0.00010660336010914762, "clip_ratio/region_mean": 0.0026671530213207006, "epoch": 0.06691983270041825, "grad_norm": 0.10894238203763962, "learning_rate": 1e-06, "loss": 0.0101, "step": 717 }, { "clip_ratio/high_max": 0.0027487784682307392, "clip_ratio/high_mean": 0.0011286169828963466, "clip_ratio/low_mean": 0.001303982895478839, "clip_ratio/low_min": 0.00011971952881140169, "clip_ratio/region_mean": 0.002432599852909334, "epoch": 0.06701316580041883, "grad_norm": 0.11518412083387375, "learning_rate": 1e-06, "loss": 0.0245, "step": 718 }, { "clip_ratio/high_max": 0.002953848939796444, "clip_ratio/high_mean": 0.0012476090596464928, "clip_ratio/low_mean": 0.001168877261079615, "clip_ratio/low_min": 0.00022012464160070522, "clip_ratio/region_mean": 0.0024164863061741926, "epoch": 0.06710649890041942, "grad_norm": 0.10688501596450806, "learning_rate": 1e-06, "loss": 0.0023, "step": 719 }, { "clip_ratio/high_max": 0.002876053396903444, "clip_ratio/high_mean": 0.0012736888820654713, "clip_ratio/low_mean": 0.0011923532256332692, "clip_ratio/low_min": 1.4282450138125569e-05, "clip_ratio/region_mean": 0.002466042038577143, "epoch": 0.06719983200042, "grad_norm": 0.11770960688591003, "learning_rate": 1e-06, "loss": -0.001, "step": 720 }, { "clip_ratio/high_max": 0.00280849384580506, "clip_ratio/high_mean": 0.0012143038838985376, "clip_ratio/low_mean": 0.0013375899943639524, "clip_ratio/low_min": 0.0001574281122884713, "clip_ratio/region_mean": 0.0025518938491586596, "epoch": 0.06729316510042058, "grad_norm": 0.10870195180177689, "learning_rate": 1e-06, "loss": -0.0131, "step": 721 }, { "clip_ratio/high_max": 0.002300151107192505, "clip_ratio/high_mean": 0.0010804168778122403, "clip_ratio/low_mean": 0.0013466928285197355, "clip_ratio/low_min": 0.0001748389968270203, "clip_ratio/region_mean": 0.002427109742711764, "epoch": 0.06738649820042117, "grad_norm": 0.11212006211280823, "learning_rate": 1e-06, "loss": 0.0141, "step": 722 }, { "clip_ratio/high_max": 0.002081649723550072, "clip_ratio/high_mean": 0.0009708205252536573, "clip_ratio/low_mean": 0.0013232474211690715, "clip_ratio/low_min": 0.00024585640676377807, "clip_ratio/region_mean": 0.0022940679627936333, "epoch": 0.06747983130042175, "grad_norm": 0.10556382685899734, "learning_rate": 1e-06, "loss": 0.0628, "step": 723 }, { "clip_ratio/high_max": 0.002365284766710829, "clip_ratio/high_mean": 0.001005058957161964, "clip_ratio/low_mean": 0.0014573321132047568, "clip_ratio/low_min": 0.0002775772572931601, "clip_ratio/region_mean": 0.002462391057633795, "epoch": 0.06757316440042234, "grad_norm": 0.11068706959486008, "learning_rate": 1e-06, "loss": 0.0409, "step": 724 }, { "clip_ratio/high_max": 0.0025041052504093386, "clip_ratio/high_mean": 0.0011567464152903995, "clip_ratio/low_mean": 0.00125418507559516, "clip_ratio/low_min": 0.00010440914866194362, "clip_ratio/region_mean": 0.00241093151998939, "epoch": 0.06766649750042292, "grad_norm": 0.1133076548576355, "learning_rate": 1e-06, "loss": 0.0088, "step": 725 }, { "clip_ratio/high_max": 0.00271190805506194, "clip_ratio/high_mean": 0.0011754864372051088, "clip_ratio/low_mean": 0.0014167626213748008, "clip_ratio/low_min": 0.0001713991878204979, "clip_ratio/region_mean": 0.002592249002191238, "epoch": 0.0677598306004235, "grad_norm": 0.11904425173997879, "learning_rate": 1e-06, "loss": 0.0058, "step": 726 }, { "clip_ratio/high_max": 0.0030013936921022832, "clip_ratio/high_mean": 0.00122209042092436, "clip_ratio/low_mean": 0.0013235732239991194, "clip_ratio/low_min": 0.00020283639059925918, "clip_ratio/region_mean": 0.0025456636358285323, "epoch": 0.06785316370042409, "grad_norm": 0.12318696826696396, "learning_rate": 1e-06, "loss": 0.0225, "step": 727 }, { "clip_ratio/high_max": 0.0024411594931734726, "clip_ratio/high_mean": 0.001115340750402538, "clip_ratio/low_mean": 0.001319159367994871, "clip_ratio/low_min": 0.00010093897981278133, "clip_ratio/region_mean": 0.0024345001074834727, "epoch": 0.06794649680042467, "grad_norm": 0.12597160041332245, "learning_rate": 1e-06, "loss": 0.0019, "step": 728 }, { "clip_ratio/high_max": 0.0026826402463484555, "clip_ratio/high_mean": 0.0011313702489132993, "clip_ratio/low_mean": 0.0013665519873029552, "clip_ratio/low_min": 0.00012475447510951199, "clip_ratio/region_mean": 0.0024979221852845512, "epoch": 0.06803982990042524, "grad_norm": 0.10503245145082474, "learning_rate": 1e-06, "loss": 0.0321, "step": 729 }, { "clip_ratio/high_max": 0.002960867976071313, "clip_ratio/high_mean": 0.0012720336235361174, "clip_ratio/low_mean": 0.0013114199246047065, "clip_ratio/low_min": 0.00011685091521940194, "clip_ratio/region_mean": 0.002583453526312951, "epoch": 0.06813316300042584, "grad_norm": 0.2732866704463959, "learning_rate": 1e-06, "loss": -0.0046, "step": 730 }, { "clip_ratio/high_max": 0.002719235031690914, "clip_ratio/high_mean": 0.001160748168331338, "clip_ratio/low_mean": 0.0014644000912085176, "clip_ratio/low_min": 0.0001880654162960127, "clip_ratio/region_mean": 0.0026251482486259192, "epoch": 0.06822649610042641, "grad_norm": 0.11016158014535904, "learning_rate": 1e-06, "loss": 0.0422, "step": 731 }, { "clip_ratio/high_max": 0.0023739947100693826, "clip_ratio/high_mean": 0.0010047666419268353, "clip_ratio/low_mean": 0.0014650142729806248, "clip_ratio/low_min": 0.00016231764584517805, "clip_ratio/region_mean": 0.0024697809349163435, "epoch": 0.06831982920042699, "grad_norm": 0.11983786523342133, "learning_rate": 1e-06, "loss": 0.0787, "step": 732 }, { "clip_ratio/high_max": 0.00272560837038327, "clip_ratio/high_mean": 0.0012138942620367743, "clip_ratio/low_mean": 0.0012309012290643295, "clip_ratio/low_min": 0.00011816013829957228, "clip_ratio/region_mean": 0.0024447954856441356, "epoch": 0.06841316230042759, "grad_norm": 0.10642242431640625, "learning_rate": 1e-06, "loss": 0.0253, "step": 733 }, { "clip_ratio/high_max": 0.002649971909704618, "clip_ratio/high_mean": 0.001168540577054955, "clip_ratio/low_mean": 0.001448224727937486, "clip_ratio/low_min": 0.00021643979016516823, "clip_ratio/region_mean": 0.002616765268612653, "epoch": 0.06850649540042816, "grad_norm": 0.11385363340377808, "learning_rate": 1e-06, "loss": 0.0491, "step": 734 }, { "clip_ratio/high_max": 0.0028327024629106745, "clip_ratio/high_mean": 0.0012554569984786212, "clip_ratio/low_mean": 0.0013119755385559984, "clip_ratio/low_min": 2.8936887247255072e-05, "clip_ratio/region_mean": 0.002567432551586535, "epoch": 0.06859982850042876, "grad_norm": 0.11786264926195145, "learning_rate": 1e-06, "loss": 0.0242, "step": 735 }, { "clip_ratio/high_max": 0.002444076741085155, "clip_ratio/high_mean": 0.0010475339877302758, "clip_ratio/low_mean": 0.0013390347667154856, "clip_ratio/low_min": 0.00016115663493110333, "clip_ratio/region_mean": 0.0023865687689976767, "epoch": 0.06869316160042933, "grad_norm": 0.11437604576349258, "learning_rate": 1e-06, "loss": 0.0067, "step": 736 }, { "clip_ratio/high_max": 0.0025760113931028172, "clip_ratio/high_mean": 0.0010824208584381267, "clip_ratio/low_mean": 0.001232916072694934, "clip_ratio/low_min": 4.539676956483163e-05, "clip_ratio/region_mean": 0.00231533693295205, "epoch": 0.06878649470042991, "grad_norm": 0.10543030500411987, "learning_rate": 1e-06, "loss": 0.0358, "step": 737 }, { "clip_ratio/high_max": 0.002850871860573534, "clip_ratio/high_mean": 0.0010817082293215208, "clip_ratio/low_mean": 0.0012590585465659387, "clip_ratio/low_min": 0.00010499057316337712, "clip_ratio/region_mean": 0.0023407667904393747, "epoch": 0.0688798278004305, "grad_norm": 0.11870167404413223, "learning_rate": 1e-06, "loss": 0.0418, "step": 738 }, { "clip_ratio/high_max": 0.00297409145423444, "clip_ratio/high_mean": 0.0013074675953248516, "clip_ratio/low_mean": 0.0012806278282369021, "clip_ratio/low_min": 0.0002119890214089537, "clip_ratio/region_mean": 0.0025880953908199444, "epoch": 0.06897316090043108, "grad_norm": 0.13427087664604187, "learning_rate": 1e-06, "loss": -0.0024, "step": 739 }, { "clip_ratio/high_max": 0.0025721776619320735, "clip_ratio/high_mean": 0.0012095088732166914, "clip_ratio/low_mean": 0.0012556532874441473, "clip_ratio/low_min": 0.000200507942281547, "clip_ratio/region_mean": 0.002465162193402648, "epoch": 0.06906649400043166, "grad_norm": 0.11577577143907547, "learning_rate": 1e-06, "loss": -0.0034, "step": 740 }, { "clip_ratio/high_max": 0.002934826072305441, "clip_ratio/high_mean": 0.0012419092599884607, "clip_ratio/low_mean": 0.0012650964144995669, "clip_ratio/low_min": 0.0001582368367962772, "clip_ratio/region_mean": 0.0025070056872209534, "epoch": 0.06915982710043225, "grad_norm": 0.1072765663266182, "learning_rate": 1e-06, "loss": 0.0219, "step": 741 }, { "clip_ratio/high_max": 0.0024932229352998547, "clip_ratio/high_mean": 0.0009867256394500146, "clip_ratio/low_mean": 0.0011244362340221414, "clip_ratio/low_min": 0.00014509032644127728, "clip_ratio/region_mean": 0.0021111618989380077, "epoch": 0.06925316020043283, "grad_norm": 0.09720559418201447, "learning_rate": 1e-06, "loss": 0.0583, "step": 742 }, { "clip_ratio/high_max": 0.0031298769026761875, "clip_ratio/high_mean": 0.001249256940354826, "clip_ratio/low_mean": 0.0013765303701802623, "clip_ratio/low_min": 8.651172993268119e-05, "clip_ratio/region_mean": 0.0026257873250870034, "epoch": 0.06934649330043341, "grad_norm": 0.12963922321796417, "learning_rate": 1e-06, "loss": 0.019, "step": 743 }, { "clip_ratio/high_max": 0.0030056244577281177, "clip_ratio/high_mean": 0.0012920232547912747, "clip_ratio/low_mean": 0.0013579841397586279, "clip_ratio/low_min": 0.00018727317637967644, "clip_ratio/region_mean": 0.002650007460033521, "epoch": 0.069439826400434, "grad_norm": 0.1084214374423027, "learning_rate": 1e-06, "loss": 0.0018, "step": 744 }, { "clip_ratio/high_max": 0.0022582934543606825, "clip_ratio/high_mean": 0.0010689882383303484, "clip_ratio/low_mean": 0.0014702399348607287, "clip_ratio/low_min": 0.00028344635938992724, "clip_ratio/region_mean": 0.002539228131354321, "epoch": 0.06953315950043458, "grad_norm": 0.11832050234079361, "learning_rate": 1e-06, "loss": 0.0976, "step": 745 }, { "clip_ratio/high_max": 0.002255209503346123, "clip_ratio/high_mean": 0.0010784399892145302, "clip_ratio/low_mean": 0.0010898211221501697, "clip_ratio/low_min": 3.723439294844866e-05, "clip_ratio/region_mean": 0.002168261067708954, "epoch": 0.06962649260043517, "grad_norm": 0.1100601777434349, "learning_rate": 1e-06, "loss": 0.0595, "step": 746 }, { "clip_ratio/high_max": 0.0026475909544387832, "clip_ratio/high_mean": 0.0011875226664415095, "clip_ratio/low_mean": 0.0014723654276167508, "clip_ratio/low_min": 0.0002479485392541392, "clip_ratio/region_mean": 0.0026598880504025146, "epoch": 0.06971982570043575, "grad_norm": 0.10789845883846283, "learning_rate": 1e-06, "loss": 0.0307, "step": 747 }, { "clip_ratio/high_max": 0.003017264862137381, "clip_ratio/high_mean": 0.0013137899659341201, "clip_ratio/low_mean": 0.0014057157313800417, "clip_ratio/low_min": 0.00024162018235074356, "clip_ratio/region_mean": 0.00271950573369395, "epoch": 0.06981315880043633, "grad_norm": 0.11357614398002625, "learning_rate": 1e-06, "loss": -0.0033, "step": 748 }, { "clip_ratio/high_max": 0.0025546381875756197, "clip_ratio/high_mean": 0.0011461432222859003, "clip_ratio/low_mean": 0.0015304677654057741, "clip_ratio/low_min": 0.0003436168626649305, "clip_ratio/region_mean": 0.0026766110095195472, "epoch": 0.06990649190043692, "grad_norm": 0.10405183583498001, "learning_rate": 1e-06, "loss": 0.0235, "step": 749 }, { "clip_ratio/high_max": 0.003007886822160799, "clip_ratio/high_mean": 0.0012398003673297353, "clip_ratio/low_mean": 0.0011930224791285582, "clip_ratio/low_min": 0.00013628236047225073, "clip_ratio/region_mean": 0.002432822839182336, "epoch": 0.0699998250004375, "grad_norm": 0.10911568999290466, "learning_rate": 1e-06, "loss": 0.0061, "step": 750 }, { "clip_ratio/high_max": 0.0030501093424391, "clip_ratio/high_mean": 0.0013584345470007975, "clip_ratio/low_mean": 0.0012423960179148708, "clip_ratio/low_min": 4.5048564970784355e-05, "clip_ratio/region_mean": 0.0026008305358118378, "epoch": 0.07009315810043808, "grad_norm": 0.1271698772907257, "learning_rate": 1e-06, "loss": 0.0146, "step": 751 }, { "clip_ratio/high_max": 0.003073713342018891, "clip_ratio/high_mean": 0.0012448882771423087, "clip_ratio/low_mean": 0.0012325837269600015, "clip_ratio/low_min": 7.209786963358056e-05, "clip_ratio/region_mean": 0.0024774719204287976, "epoch": 0.07018649120043867, "grad_norm": 0.11275521665811539, "learning_rate": 1e-06, "loss": -0.0127, "step": 752 }, { "clip_ratio/high_max": 0.002723751800658647, "clip_ratio/high_mean": 0.0011510503572935704, "clip_ratio/low_mean": 0.0012933056059409864, "clip_ratio/low_min": 0.00015398129653476644, "clip_ratio/region_mean": 0.002444355937768705, "epoch": 0.07027982430043925, "grad_norm": 0.11377551406621933, "learning_rate": 1e-06, "loss": 0.0368, "step": 753 }, { "clip_ratio/high_max": 0.002962904422020074, "clip_ratio/high_mean": 0.0013752389058936387, "clip_ratio/low_mean": 0.0012707047862932086, "clip_ratio/low_min": 5.269622943160357e-05, "clip_ratio/region_mean": 0.0026459436703589745, "epoch": 0.07037315740043983, "grad_norm": 0.1154618039727211, "learning_rate": 1e-06, "loss": 0.0017, "step": 754 }, { "clip_ratio/high_max": 0.0028729629411827773, "clip_ratio/high_mean": 0.0010724228050094098, "clip_ratio/low_mean": 0.0013544600406021345, "clip_ratio/low_min": 0.00017528566422697622, "clip_ratio/region_mean": 0.0024268827692139894, "epoch": 0.07046649050044042, "grad_norm": 0.13094009459018707, "learning_rate": 1e-06, "loss": 0.0437, "step": 755 }, { "clip_ratio/high_max": 0.00311863864044426, "clip_ratio/high_mean": 0.0012684028824878624, "clip_ratio/low_mean": 0.0012262765280866006, "clip_ratio/low_min": 0.0001873908331617713, "clip_ratio/region_mean": 0.0024946794073912315, "epoch": 0.070559823600441, "grad_norm": 0.1197633147239685, "learning_rate": 1e-06, "loss": 0.0287, "step": 756 }, { "clip_ratio/high_max": 0.002939700265415013, "clip_ratio/high_mean": 0.0012404045919538476, "clip_ratio/low_mean": 0.0013211458826845046, "clip_ratio/low_min": 1.6464699001517147e-05, "clip_ratio/region_mean": 0.0025615505219320767, "epoch": 0.07065315670044159, "grad_norm": 0.11195140331983566, "learning_rate": 1e-06, "loss": -0.0134, "step": 757 }, { "clip_ratio/high_max": 0.002776704881398473, "clip_ratio/high_mean": 0.0011888511617144104, "clip_ratio/low_mean": 0.0014347892101795878, "clip_ratio/low_min": 0.00023000847249932121, "clip_ratio/region_mean": 0.002623640415549744, "epoch": 0.07074648980044217, "grad_norm": 0.11405761539936066, "learning_rate": 1e-06, "loss": 0.0614, "step": 758 }, { "clip_ratio/high_max": 0.002627781381306704, "clip_ratio/high_mean": 0.0012184412153146695, "clip_ratio/low_mean": 0.0012199986958876252, "clip_ratio/low_min": 0.00015227431140374392, "clip_ratio/region_mean": 0.002438439922116231, "epoch": 0.07083982290044274, "grad_norm": 0.11139947175979614, "learning_rate": 1e-06, "loss": 0.0033, "step": 759 }, { "clip_ratio/high_max": 0.002774520566163119, "clip_ratio/high_mean": 0.001122625675634481, "clip_ratio/low_mean": 0.0014449931913986802, "clip_ratio/low_min": 0.00021774545439257054, "clip_ratio/region_mean": 0.0025676188088255003, "epoch": 0.07093315600044334, "grad_norm": 0.1145111620426178, "learning_rate": 1e-06, "loss": 0.0457, "step": 760 }, { "clip_ratio/high_max": 0.0027189368920517154, "clip_ratio/high_mean": 0.0011743571376428008, "clip_ratio/low_mean": 0.0014953151876397897, "clip_ratio/low_min": 0.00026165100007347064, "clip_ratio/region_mean": 0.0026696724526118487, "epoch": 0.07102648910044392, "grad_norm": 0.10632248222827911, "learning_rate": 1e-06, "loss": 0.0162, "step": 761 }, { "clip_ratio/high_max": 0.002787663324852474, "clip_ratio/high_mean": 0.0011830617731902748, "clip_ratio/low_mean": 0.0015124076016945764, "clip_ratio/low_min": 5.873068676010007e-05, "clip_ratio/region_mean": 0.0026954693530569784, "epoch": 0.0711198222004445, "grad_norm": 0.1719726324081421, "learning_rate": 1e-06, "loss": 0.0374, "step": 762 }, { "clip_ratio/high_max": 0.003003754078235943, "clip_ratio/high_mean": 0.0012541587675514165, "clip_ratio/low_mean": 0.0013991387786518317, "clip_ratio/low_min": 7.044962512736674e-05, "clip_ratio/region_mean": 0.0026532975170994177, "epoch": 0.07121315530044509, "grad_norm": 0.11904733628034592, "learning_rate": 1e-06, "loss": 0.0186, "step": 763 }, { "clip_ratio/high_max": 0.0033765009065973572, "clip_ratio/high_mean": 0.001281166980334092, "clip_ratio/low_mean": 0.0015819539039512165, "clip_ratio/low_min": 0.00016568659793847473, "clip_ratio/region_mean": 0.0028631208988372236, "epoch": 0.07130648840044566, "grad_norm": 0.11197473853826523, "learning_rate": 1e-06, "loss": 0.0364, "step": 764 }, { "clip_ratio/high_max": 0.002852642923244275, "clip_ratio/high_mean": 0.0011721480223059189, "clip_ratio/low_mean": 0.0013013201169087552, "clip_ratio/low_min": 6.967284753045533e-05, "clip_ratio/region_mean": 0.002473468099196907, "epoch": 0.07139982150044624, "grad_norm": 0.11385203897953033, "learning_rate": 1e-06, "loss": -0.0272, "step": 765 }, { "clip_ratio/high_max": 0.002533957340347115, "clip_ratio/high_mean": 0.0011181412955920678, "clip_ratio/low_mean": 0.0014496797302854247, "clip_ratio/low_min": 0.00020601628239091951, "clip_ratio/region_mean": 0.0025678210076875985, "epoch": 0.07149315460044683, "grad_norm": 0.10289259999990463, "learning_rate": 1e-06, "loss": 0.0521, "step": 766 }, { "clip_ratio/high_max": 0.0025922762506525032, "clip_ratio/high_mean": 0.0010080376850964967, "clip_ratio/low_mean": 0.001377177017275244, "clip_ratio/low_min": 0.00015607318982802099, "clip_ratio/region_mean": 0.002385214676905889, "epoch": 0.07158648770044741, "grad_norm": 0.11131215840578079, "learning_rate": 1e-06, "loss": 0.0201, "step": 767 }, { "clip_ratio/high_max": 0.002780564631393645, "clip_ratio/high_mean": 0.001201951326947892, "clip_ratio/low_mean": 0.0011877912329509854, "clip_ratio/low_min": 5.9996538766426966e-05, "clip_ratio/region_mean": 0.0023897426362964325, "epoch": 0.071679820800448, "grad_norm": 0.10477583855390549, "learning_rate": 1e-06, "loss": 0.0101, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011204310825892905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 603.15576171875, "completions/mean_terminated_length": 563.577392578125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.07177315390044858, "grad_norm": 0.12820403277873993, "learning_rate": 1e-06, "loss": 0.0217, "num_tokens": 566593954.0, "reward": 0.597115695476532, "reward_std": 0.18933433294296265, "rewards/simpleverify_reward/mean": 0.5971156358718872, "rewards/simpleverify_reward/std": 0.4904799163341522, "step": 769 }, { "clip_ratio/high_max": 0.002051092989859171, "clip_ratio/high_mean": 0.0009391540825163247, "clip_ratio/low_mean": 0.0005050464451414882, "clip_ratio/low_min": 1.314405926677864e-05, "clip_ratio/region_mean": 0.0014442005449382123, "epoch": 0.07186648700044916, "grad_norm": 0.12566576898097992, "learning_rate": 1e-06, "loss": -0.0002, "step": 770 }, { "clip_ratio/high_max": 0.0017706296130199917, "clip_ratio/high_mean": 0.00081976316505461, "clip_ratio/low_mean": 0.0007088625784490432, "clip_ratio/low_min": 5.78703666178626e-05, "clip_ratio/region_mean": 0.0015286257330444641, "epoch": 0.07195982010044975, "grad_norm": 0.11572977900505066, "learning_rate": 1e-06, "loss": 0.0479, "step": 771 }, { "clip_ratio/high_max": 0.002138843272405211, "clip_ratio/high_mean": 0.0008725789120944683, "clip_ratio/low_mean": 0.0007034297286736546, "clip_ratio/low_min": 1.4172335795592517e-05, "clip_ratio/region_mean": 0.0015760086243972182, "epoch": 0.07205315320045033, "grad_norm": 0.11675462871789932, "learning_rate": 1e-06, "loss": 0.0509, "step": 772 }, { "clip_ratio/high_max": 0.0020723529378301464, "clip_ratio/high_mean": 0.0008848889883665834, "clip_ratio/low_mean": 0.0007510084979003295, "clip_ratio/low_min": 3.8654676245641895e-05, "clip_ratio/region_mean": 0.001635897482628934, "epoch": 0.07214648630045091, "grad_norm": 0.11294642090797424, "learning_rate": 1e-06, "loss": 0.0277, "step": 773 }, { "clip_ratio/high_max": 0.002080842066789046, "clip_ratio/high_mean": 0.000885394414581242, "clip_ratio/low_mean": 0.0006023007954354398, "clip_ratio/low_min": 2.299259267601883e-05, "clip_ratio/region_mean": 0.0014876952482154593, "epoch": 0.0722398194004515, "grad_norm": 0.09766869992017746, "learning_rate": 1e-06, "loss": -0.0242, "step": 774 }, { "clip_ratio/high_max": 0.0022430263343267143, "clip_ratio/high_mean": 0.0008442738326266408, "clip_ratio/low_mean": 0.0007915704281913349, "clip_ratio/low_min": 4.910353345621843e-05, "clip_ratio/region_mean": 0.0016358441862394102, "epoch": 0.07233315250045208, "grad_norm": 0.12099959701299667, "learning_rate": 1e-06, "loss": 0.0325, "step": 775 }, { "clip_ratio/high_max": 0.0021076721350254957, "clip_ratio/high_mean": 0.0008769266496528871, "clip_ratio/low_mean": 0.0008853892832121346, "clip_ratio/low_min": 7.717847256571986e-05, "clip_ratio/region_mean": 0.0017623159365030006, "epoch": 0.07242648560045266, "grad_norm": 0.11272159218788147, "learning_rate": 1e-06, "loss": 0.016, "step": 776 }, { "clip_ratio/high_max": 0.0020168940354778897, "clip_ratio/high_mean": 0.0008096988167380914, "clip_ratio/low_mean": 0.0007753350128041347, "clip_ratio/low_min": 5.0387608098390047e-05, "clip_ratio/region_mean": 0.0015850338349991944, "epoch": 0.07251981870045325, "grad_norm": 0.1247406154870987, "learning_rate": 1e-06, "loss": 0.0627, "step": 777 }, { "clip_ratio/high_max": 0.002209799386037048, "clip_ratio/high_mean": 0.0009233534183294978, "clip_ratio/low_mean": 0.0009001679973152932, "clip_ratio/low_min": 4.590253774949815e-05, "clip_ratio/region_mean": 0.0018235213719890453, "epoch": 0.07261315180045383, "grad_norm": 0.11831159144639969, "learning_rate": 1e-06, "loss": 0.0242, "step": 778 }, { "clip_ratio/high_max": 0.0025225516292266548, "clip_ratio/high_mean": 0.0011233956065552775, "clip_ratio/low_mean": 0.0006616796999878716, "clip_ratio/low_min": 1.2649261407204904e-05, "clip_ratio/region_mean": 0.0017850752919912338, "epoch": 0.07270648490045442, "grad_norm": 0.11290524899959564, "learning_rate": 1e-06, "loss": -0.0551, "step": 779 }, { "clip_ratio/high_max": 0.0025450853863731027, "clip_ratio/high_mean": 0.0009829581504163798, "clip_ratio/low_mean": 0.0008543614640075248, "clip_ratio/low_min": 3.2444766475236975e-05, "clip_ratio/region_mean": 0.0018373196216998622, "epoch": 0.072799818000455, "grad_norm": 0.10938029736280441, "learning_rate": 1e-06, "loss": 0.0388, "step": 780 }, { "clip_ratio/high_max": 0.002183779033657629, "clip_ratio/high_mean": 0.0008654488938191207, "clip_ratio/low_mean": 0.000757923505261715, "clip_ratio/low_min": 5.14010462211445e-05, "clip_ratio/region_mean": 0.0016233724090852775, "epoch": 0.07289315110045558, "grad_norm": 0.12256066501140594, "learning_rate": 1e-06, "loss": 0.0489, "step": 781 }, { "clip_ratio/high_max": 0.002266704206704162, "clip_ratio/high_mean": 0.0010063832487503532, "clip_ratio/low_mean": 0.0008115047530736774, "clip_ratio/low_min": 2.5297049432992935e-05, "clip_ratio/region_mean": 0.001817888012737967, "epoch": 0.07298648420045617, "grad_norm": 0.11712535470724106, "learning_rate": 1e-06, "loss": -0.0269, "step": 782 }, { "clip_ratio/high_max": 0.0021909744609729387, "clip_ratio/high_mean": 0.0009347949526272714, "clip_ratio/low_mean": 0.0008700080306880409, "clip_ratio/low_min": 9.440225949219894e-05, "clip_ratio/region_mean": 0.001804803017876111, "epoch": 0.07307981730045675, "grad_norm": 0.11367373168468475, "learning_rate": 1e-06, "loss": -0.0085, "step": 783 }, { "clip_ratio/high_max": 0.0021500897128134966, "clip_ratio/high_mean": 0.0010276847569912206, "clip_ratio/low_mean": 0.0010036926905740984, "clip_ratio/low_min": 2.5253820240322966e-05, "clip_ratio/region_mean": 0.002031377429375425, "epoch": 0.07317315040045733, "grad_norm": 0.13200776278972626, "learning_rate": 1e-06, "loss": 0.0439, "step": 784 }, { "clip_ratio/high_max": 0.0023409711284330115, "clip_ratio/high_mean": 0.0008731545967748389, "clip_ratio/low_mean": 0.0009059252279257635, "clip_ratio/low_min": 8.034350230445853e-05, "clip_ratio/region_mean": 0.0017790797865018249, "epoch": 0.07326648350045792, "grad_norm": 0.11846556514501572, "learning_rate": 1e-06, "loss": 0.0278, "step": 785 }, { "clip_ratio/high_max": 0.0026085656136274338, "clip_ratio/high_mean": 0.0010803904078784399, "clip_ratio/low_mean": 0.0010722352089942433, "clip_ratio/low_min": 0.00010050079072243534, "clip_ratio/region_mean": 0.0021526256314245984, "epoch": 0.0733598166004585, "grad_norm": 0.12004795670509338, "learning_rate": 1e-06, "loss": -0.0085, "step": 786 }, { "clip_ratio/high_max": 0.002253969170851633, "clip_ratio/high_mean": 0.0009531120922474656, "clip_ratio/low_mean": 0.0011178096883668331, "clip_ratio/low_min": 0.00016803065591375344, "clip_ratio/region_mean": 0.002070921786071267, "epoch": 0.07345314970045909, "grad_norm": 0.12405817210674286, "learning_rate": 1e-06, "loss": 0.0162, "step": 787 }, { "clip_ratio/high_max": 0.0019295849560876377, "clip_ratio/high_mean": 0.00087690779400873, "clip_ratio/low_mean": 0.0011875842574227136, "clip_ratio/low_min": 8.876276933733607e-05, "clip_ratio/region_mean": 0.0020644920659833588, "epoch": 0.07354648280045967, "grad_norm": 0.12075860053300858, "learning_rate": 1e-06, "loss": 0.03, "step": 788 }, { "clip_ratio/high_max": 0.0025981288636103272, "clip_ratio/high_mean": 0.0009626758183003403, "clip_ratio/low_mean": 0.0009141045484284405, "clip_ratio/low_min": 4.4804218305216637e-05, "clip_ratio/region_mean": 0.0018767803339869715, "epoch": 0.07363981590046025, "grad_norm": 0.11440103501081467, "learning_rate": 1e-06, "loss": 0.0225, "step": 789 }, { "clip_ratio/high_max": 0.002327212929230882, "clip_ratio/high_mean": 0.000994183979855734, "clip_ratio/low_mean": 0.000941378173592966, "clip_ratio/low_min": 7.623283181601437e-05, "clip_ratio/region_mean": 0.0019355622061993927, "epoch": 0.07373314900046084, "grad_norm": 0.09917795658111572, "learning_rate": 1e-06, "loss": 0.0201, "step": 790 }, { "clip_ratio/high_max": 0.0023168384868768044, "clip_ratio/high_mean": 0.0008904602800612338, "clip_ratio/low_mean": 0.0010573219024081482, "clip_ratio/low_min": 0.00013963030414743116, "clip_ratio/region_mean": 0.0019477821624604985, "epoch": 0.07382648210046142, "grad_norm": 0.10493776202201843, "learning_rate": 1e-06, "loss": 0.0476, "step": 791 }, { "clip_ratio/high_max": 0.0022290104898274876, "clip_ratio/high_mean": 0.0009588014254404698, "clip_ratio/low_mean": 0.001075389522156911, "clip_ratio/low_min": 7.14579364284873e-05, "clip_ratio/region_mean": 0.0020341909112175927, "epoch": 0.073919815200462, "grad_norm": 0.12100069224834442, "learning_rate": 1e-06, "loss": 0.026, "step": 792 }, { "clip_ratio/high_max": 0.002559536565968301, "clip_ratio/high_mean": 0.001010480151308002, "clip_ratio/low_mean": 0.0011545784673216986, "clip_ratio/low_min": 0.00010771318920888007, "clip_ratio/region_mean": 0.002165058584068902, "epoch": 0.07401314830046259, "grad_norm": 0.12782274186611176, "learning_rate": 1e-06, "loss": 0.0221, "step": 793 }, { "clip_ratio/high_max": 0.0025937915925169364, "clip_ratio/high_mean": 0.001009241557767382, "clip_ratio/low_mean": 0.001053117917763302, "clip_ratio/low_min": 4.2666571971494704e-05, "clip_ratio/region_mean": 0.0020623594100470655, "epoch": 0.07410648140046316, "grad_norm": 0.11918506771326065, "learning_rate": 1e-06, "loss": 0.0338, "step": 794 }, { "clip_ratio/high_max": 0.0023571307319798507, "clip_ratio/high_mean": 0.0009932012726494577, "clip_ratio/low_mean": 0.0010559585298324237, "clip_ratio/low_min": 0.00020315511392254848, "clip_ratio/region_mean": 0.0020491597679210827, "epoch": 0.07419981450046374, "grad_norm": 0.11347195506095886, "learning_rate": 1e-06, "loss": -0.0092, "step": 795 }, { "clip_ratio/high_max": 0.0025715811243571807, "clip_ratio/high_mean": 0.0009562175309838494, "clip_ratio/low_mean": 0.001068593968739151, "clip_ratio/low_min": 0.00011595513751672115, "clip_ratio/region_mean": 0.0020248114597052336, "epoch": 0.07429314760046433, "grad_norm": 0.10754869878292084, "learning_rate": 1e-06, "loss": 0.0308, "step": 796 }, { "clip_ratio/high_max": 0.0024075661385722924, "clip_ratio/high_mean": 0.0010922811015916523, "clip_ratio/low_mean": 0.0011443973144196207, "clip_ratio/low_min": 0.00011810382693511201, "clip_ratio/region_mean": 0.0022366784323821776, "epoch": 0.07438648070046491, "grad_norm": 0.11134686321020126, "learning_rate": 1e-06, "loss": -0.0014, "step": 797 }, { "clip_ratio/high_max": 0.0018771039358398411, "clip_ratio/high_mean": 0.0008169102638930781, "clip_ratio/low_mean": 0.0009692325202195207, "clip_ratio/low_min": 2.715423579502385e-05, "clip_ratio/region_mean": 0.0017861428023024928, "epoch": 0.0744798138004655, "grad_norm": 0.1033652126789093, "learning_rate": 1e-06, "loss": 0.0151, "step": 798 }, { "clip_ratio/high_max": 0.002284582624270115, "clip_ratio/high_mean": 0.0009443743456358789, "clip_ratio/low_mean": 0.0011741655907826498, "clip_ratio/low_min": 0.00012079526277375408, "clip_ratio/region_mean": 0.0021185399091336876, "epoch": 0.07457314690046608, "grad_norm": 0.11647457629442215, "learning_rate": 1e-06, "loss": 0.0343, "step": 799 }, { "clip_ratio/high_max": 0.002678997792827431, "clip_ratio/high_mean": 0.0010899615590460598, "clip_ratio/low_mean": 0.0011122225332655944, "clip_ratio/low_min": 0.00017038251644407865, "clip_ratio/region_mean": 0.002202184099587612, "epoch": 0.07466648000046666, "grad_norm": 0.12350889295339584, "learning_rate": 1e-06, "loss": 0.0484, "step": 800 }, { "clip_ratio/high_max": 0.002412290203210432, "clip_ratio/high_mean": 0.0010598797707643826, "clip_ratio/low_mean": 0.0009714974657981656, "clip_ratio/low_min": 5.190667070564814e-05, "clip_ratio/region_mean": 0.0020313772329245694, "epoch": 0.07475981310046725, "grad_norm": 0.11538942158222198, "learning_rate": 1e-06, "loss": 0.0348, "step": 801 }, { "clip_ratio/high_max": 0.002418118150671944, "clip_ratio/high_mean": 0.0010661182641342748, "clip_ratio/low_mean": 0.0009209596519212937, "clip_ratio/low_min": 8.859075023792684e-05, "clip_ratio/region_mean": 0.001987077877856791, "epoch": 0.07485314620046783, "grad_norm": 0.10948257148265839, "learning_rate": 1e-06, "loss": 0.0077, "step": 802 }, { "clip_ratio/high_max": 0.0023883522299001925, "clip_ratio/high_mean": 0.0010372653468948556, "clip_ratio/low_mean": 0.0009967588193831034, "clip_ratio/low_min": 4.212394560454413e-05, "clip_ratio/region_mean": 0.002034024197200779, "epoch": 0.07494647930046841, "grad_norm": 0.10789617151021957, "learning_rate": 1e-06, "loss": 0.0096, "step": 803 }, { "clip_ratio/high_max": 0.002301316511875484, "clip_ratio/high_mean": 0.0009994779984481283, "clip_ratio/low_mean": 0.0010736448102761642, "clip_ratio/low_min": 0.00016360287372663151, "clip_ratio/region_mean": 0.0020731228087242926, "epoch": 0.075039812400469, "grad_norm": 0.11090195178985596, "learning_rate": 1e-06, "loss": 0.016, "step": 804 }, { "clip_ratio/high_max": 0.002645769083756022, "clip_ratio/high_mean": 0.001123054989875527, "clip_ratio/low_mean": 0.0011681924370350316, "clip_ratio/low_min": 7.465640555892605e-05, "clip_ratio/region_mean": 0.0022912474451004528, "epoch": 0.07513314550046958, "grad_norm": 0.25976425409317017, "learning_rate": 1e-06, "loss": 0.0248, "step": 805 }, { "clip_ratio/high_max": 0.002684735634829849, "clip_ratio/high_mean": 0.0010951919430226553, "clip_ratio/low_mean": 0.0009336313232779503, "clip_ratio/low_min": 0.00011956647813349264, "clip_ratio/region_mean": 0.00202882331359433, "epoch": 0.07522647860047016, "grad_norm": 0.11199431121349335, "learning_rate": 1e-06, "loss": 0.0388, "step": 806 }, { "clip_ratio/high_max": 0.002553266785980668, "clip_ratio/high_mean": 0.001117124586016871, "clip_ratio/low_mean": 0.001148157760326285, "clip_ratio/low_min": 0.00015109952801140025, "clip_ratio/region_mean": 0.0022652823172393255, "epoch": 0.07531981170047075, "grad_norm": 0.1271596997976303, "learning_rate": 1e-06, "loss": 0.0078, "step": 807 }, { "clip_ratio/high_max": 0.002470468491083011, "clip_ratio/high_mean": 0.0009934043700923212, "clip_ratio/low_mean": 0.001109728324081516, "clip_ratio/low_min": 3.391439986444311e-05, "clip_ratio/region_mean": 0.0021031326832599007, "epoch": 0.07541314480047133, "grad_norm": 0.12198836356401443, "learning_rate": 1e-06, "loss": 0.0278, "step": 808 }, { "clip_ratio/high_max": 0.002124332979292376, "clip_ratio/high_mean": 0.0009733429669722682, "clip_ratio/low_mean": 0.0012702129133685958, "clip_ratio/low_min": 9.579544712323695e-05, "clip_ratio/region_mean": 0.0022435559003497474, "epoch": 0.07550647790047192, "grad_norm": 0.11199678480625153, "learning_rate": 1e-06, "loss": 0.0639, "step": 809 }, { "clip_ratio/high_max": 0.0021873594014323317, "clip_ratio/high_mean": 0.000937352799155633, "clip_ratio/low_mean": 0.0012296169243199984, "clip_ratio/low_min": 0.00017922391816682648, "clip_ratio/region_mean": 0.0021669697380275466, "epoch": 0.0755998110004725, "grad_norm": 0.11988547444343567, "learning_rate": 1e-06, "loss": 0.0347, "step": 810 }, { "clip_ratio/high_max": 0.0026810174676938914, "clip_ratio/high_mean": 0.0012048442476952914, "clip_ratio/low_mean": 0.001280928925552871, "clip_ratio/low_min": 0.0001074354713637149, "clip_ratio/region_mean": 0.0024857731550582685, "epoch": 0.07569314410047308, "grad_norm": 0.1250828206539154, "learning_rate": 1e-06, "loss": 0.0015, "step": 811 }, { "clip_ratio/high_max": 0.002780757102300413, "clip_ratio/high_mean": 0.0011945388760068454, "clip_ratio/low_mean": 0.0011200920780538581, "clip_ratio/low_min": 6.386037784977816e-05, "clip_ratio/region_mean": 0.002314631004992407, "epoch": 0.07578647720047367, "grad_norm": 0.12443851679563522, "learning_rate": 1e-06, "loss": 0.0209, "step": 812 }, { "clip_ratio/high_max": 0.002125648872606689, "clip_ratio/high_mean": 0.0008933621629694244, "clip_ratio/low_mean": 0.0010431468108436093, "clip_ratio/low_min": 9.345857870357577e-05, "clip_ratio/region_mean": 0.0019365089538041502, "epoch": 0.07587981030047425, "grad_norm": 0.09821508079767227, "learning_rate": 1e-06, "loss": 0.0161, "step": 813 }, { "clip_ratio/high_max": 0.0027740029072447214, "clip_ratio/high_mean": 0.0010859190260816831, "clip_ratio/low_mean": 0.0010761445282696513, "clip_ratio/low_min": 9.192534071189584e-05, "clip_ratio/region_mean": 0.0021620636034640484, "epoch": 0.07597314340047483, "grad_norm": 0.11543142795562744, "learning_rate": 1e-06, "loss": -0.0134, "step": 814 }, { "clip_ratio/high_max": 0.002336179051781073, "clip_ratio/high_mean": 0.001012662069115322, "clip_ratio/low_mean": 0.0011903145041287644, "clip_ratio/low_min": 8.642133798275609e-05, "clip_ratio/region_mean": 0.00220297653140733, "epoch": 0.07606647650047542, "grad_norm": 0.11538077145814896, "learning_rate": 1e-06, "loss": 0.0063, "step": 815 }, { "clip_ratio/high_max": 0.0026883843675022945, "clip_ratio/high_mean": 0.0012067052703059744, "clip_ratio/low_mean": 0.0011605535364651587, "clip_ratio/low_min": 4.938949132338166e-05, "clip_ratio/region_mean": 0.002367258806771133, "epoch": 0.076159809600476, "grad_norm": 0.11419616639614105, "learning_rate": 1e-06, "loss": 0.0138, "step": 816 }, { "clip_ratio/high_max": 0.002302663488080725, "clip_ratio/high_mean": 0.0009473786285525421, "clip_ratio/low_mean": 0.0014439004808082245, "clip_ratio/low_min": 0.00021686234958906425, "clip_ratio/region_mean": 0.0023912791366456077, "epoch": 0.07625314270047658, "grad_norm": 0.12642024457454681, "learning_rate": 1e-06, "loss": 0.0611, "step": 817 }, { "clip_ratio/high_max": 0.0024478775449097157, "clip_ratio/high_mean": 0.001012768603686709, "clip_ratio/low_mean": 0.001272786124900449, "clip_ratio/low_min": 0.00011893514238181524, "clip_ratio/region_mean": 0.00228555469220737, "epoch": 0.07634647580047717, "grad_norm": 0.10971798002719879, "learning_rate": 1e-06, "loss": 0.0118, "step": 818 }, { "clip_ratio/high_max": 0.0027245371893513948, "clip_ratio/high_mean": 0.0011951471715292428, "clip_ratio/low_mean": 0.001217750472278567, "clip_ratio/low_min": 2.928316462202929e-05, "clip_ratio/region_mean": 0.0024128976292558946, "epoch": 0.07643980890047775, "grad_norm": 0.1201469674706459, "learning_rate": 1e-06, "loss": 0.0199, "step": 819 }, { "clip_ratio/high_max": 0.0026759453758131713, "clip_ratio/high_mean": 0.0010629039206833113, "clip_ratio/low_mean": 0.001152547964011319, "clip_ratio/low_min": 5.900878204556648e-05, "clip_ratio/region_mean": 0.0022154518810566515, "epoch": 0.07653314200047834, "grad_norm": 0.11807885766029358, "learning_rate": 1e-06, "loss": 0.0153, "step": 820 }, { "clip_ratio/high_max": 0.0023412224036292173, "clip_ratio/high_mean": 0.0010300843787263148, "clip_ratio/low_mean": 0.0012751088288496248, "clip_ratio/low_min": 0.00012151322516729124, "clip_ratio/region_mean": 0.002305193214851897, "epoch": 0.07662647510047892, "grad_norm": 0.11873375624418259, "learning_rate": 1e-06, "loss": 0.0401, "step": 821 }, { "clip_ratio/high_max": 0.0021195502267801203, "clip_ratio/high_mean": 0.0009045373262779322, "clip_ratio/low_mean": 0.0012838650363846682, "clip_ratio/low_min": 7.722931695752777e-05, "clip_ratio/region_mean": 0.002188402315368876, "epoch": 0.0767198082004795, "grad_norm": 0.10836665332317352, "learning_rate": 1e-06, "loss": 0.0599, "step": 822 }, { "clip_ratio/high_max": 0.0021443845907924697, "clip_ratio/high_mean": 0.001007751152428682, "clip_ratio/low_mean": 0.00144003579771379, "clip_ratio/low_min": 0.0002094564324579551, "clip_ratio/region_mean": 0.002447786901029758, "epoch": 0.07681314130048009, "grad_norm": 0.1180749461054802, "learning_rate": 1e-06, "loss": 0.0289, "step": 823 }, { "clip_ratio/high_max": 0.0025228763770428486, "clip_ratio/high_mean": 0.0009985250908357557, "clip_ratio/low_mean": 0.0014108365176070947, "clip_ratio/low_min": 8.019329834496602e-05, "clip_ratio/region_mean": 0.0024093616812024266, "epoch": 0.07690647440048066, "grad_norm": 0.12274163961410522, "learning_rate": 1e-06, "loss": 0.0381, "step": 824 }, { "clip_ratio/high_max": 0.002513311876100488, "clip_ratio/high_mean": 0.0010567716144578299, "clip_ratio/low_mean": 0.001161065109045012, "clip_ratio/low_min": 3.436139286350226e-05, "clip_ratio/region_mean": 0.0022178367580636404, "epoch": 0.07699980750048124, "grad_norm": 0.11557767540216446, "learning_rate": 1e-06, "loss": -0.0128, "step": 825 }, { "clip_ratio/high_max": 0.002527158176235389, "clip_ratio/high_mean": 0.0009697247296571732, "clip_ratio/low_mean": 0.0013765989970124792, "clip_ratio/low_min": 0.00023680822232563514, "clip_ratio/region_mean": 0.0023463237084797584, "epoch": 0.07709314060048184, "grad_norm": 0.11587980389595032, "learning_rate": 1e-06, "loss": 0.0476, "step": 826 }, { "clip_ratio/high_max": 0.002271386307256762, "clip_ratio/high_mean": 0.0009781260323507013, "clip_ratio/low_mean": 0.0012646821451198775, "clip_ratio/low_min": 0.0001422819250365137, "clip_ratio/region_mean": 0.002242808164737653, "epoch": 0.07718647370048241, "grad_norm": 0.10734894871711731, "learning_rate": 1e-06, "loss": 0.0598, "step": 827 }, { "clip_ratio/high_max": 0.002675210627785418, "clip_ratio/high_mean": 0.0011202763416804373, "clip_ratio/low_mean": 0.0011107365862699226, "clip_ratio/low_min": 3.319171446491964e-05, "clip_ratio/region_mean": 0.0022310129133984447, "epoch": 0.07727980680048299, "grad_norm": 0.110447458922863, "learning_rate": 1e-06, "loss": 0.0168, "step": 828 }, { "clip_ratio/high_max": 0.002626883033371996, "clip_ratio/high_mean": 0.0011036710275220685, "clip_ratio/low_mean": 0.0012790057226084173, "clip_ratio/low_min": 0.00019176518617314287, "clip_ratio/region_mean": 0.002382676779234316, "epoch": 0.07737313990048358, "grad_norm": 0.12311238795518875, "learning_rate": 1e-06, "loss": 0.0767, "step": 829 }, { "clip_ratio/high_max": 0.002495641056157183, "clip_ratio/high_mean": 0.0010171380708925426, "clip_ratio/low_mean": 0.0010762759739009198, "clip_ratio/low_min": 0.00010596569427434588, "clip_ratio/region_mean": 0.0020934140411554836, "epoch": 0.07746647300048416, "grad_norm": 0.1093924343585968, "learning_rate": 1e-06, "loss": 0.0101, "step": 830 }, { "clip_ratio/high_max": 0.002840391098288819, "clip_ratio/high_mean": 0.0012321114936639788, "clip_ratio/low_mean": 0.0010165530256927013, "clip_ratio/low_min": 0.00014056388135941233, "clip_ratio/region_mean": 0.0022486645830213092, "epoch": 0.07755980610048475, "grad_norm": 0.11260376870632172, "learning_rate": 1e-06, "loss": -0.0124, "step": 831 }, { "clip_ratio/high_max": 0.0025712110800668597, "clip_ratio/high_mean": 0.0010986134875565767, "clip_ratio/low_mean": 0.0012721074563160073, "clip_ratio/low_min": 0.00018343739793635905, "clip_ratio/region_mean": 0.0023707208965788595, "epoch": 0.07765313920048533, "grad_norm": 0.11848191916942596, "learning_rate": 1e-06, "loss": 0.0329, "step": 832 }, { "clip_ratio/high_max": 0.002364235304412432, "clip_ratio/high_mean": 0.0009977382414945168, "clip_ratio/low_mean": 0.0012987564186914824, "clip_ratio/low_min": 0.00017530275545141194, "clip_ratio/region_mean": 0.002296494654729031, "epoch": 0.07774647230048591, "grad_norm": 0.12366092950105667, "learning_rate": 1e-06, "loss": 0.0599, "step": 833 }, { "clip_ratio/high_max": 0.00294971872062888, "clip_ratio/high_mean": 0.001173786982690217, "clip_ratio/low_mean": 0.0012067906282027252, "clip_ratio/low_min": 0.0001133947771450039, "clip_ratio/region_mean": 0.0023805776072549634, "epoch": 0.0778398054004865, "grad_norm": 0.11560620367527008, "learning_rate": 1e-06, "loss": 0.0053, "step": 834 }, { "clip_ratio/high_max": 0.0028235580102773383, "clip_ratio/high_mean": 0.0011722293529601302, "clip_ratio/low_mean": 0.0012163692772446666, "clip_ratio/low_min": 0.00011307509976177244, "clip_ratio/region_mean": 0.0023885986811365, "epoch": 0.07793313850048708, "grad_norm": 0.1150728091597557, "learning_rate": 1e-06, "loss": 0.027, "step": 835 }, { "clip_ratio/high_max": 0.002796860979287885, "clip_ratio/high_mean": 0.001230579313414637, "clip_ratio/low_mean": 0.0011900466452061664, "clip_ratio/low_min": 0.00010438288518344052, "clip_ratio/region_mean": 0.0024206259913626127, "epoch": 0.07802647160048766, "grad_norm": 0.12293171137571335, "learning_rate": 1e-06, "loss": 0.0091, "step": 836 }, { "clip_ratio/high_max": 0.0025184596815961413, "clip_ratio/high_mean": 0.0010847919293155428, "clip_ratio/low_mean": 0.0010645002948876936, "clip_ratio/low_min": 3.018687311850954e-05, "clip_ratio/region_mean": 0.002149292267858982, "epoch": 0.07811980470048825, "grad_norm": 0.10963098704814911, "learning_rate": 1e-06, "loss": 0.0009, "step": 837 }, { "clip_ratio/high_max": 0.0029446048793033697, "clip_ratio/high_mean": 0.0012566983023134526, "clip_ratio/low_mean": 0.0011154347703268286, "clip_ratio/low_min": 0.00018814946815837175, "clip_ratio/region_mean": 0.0023721331017441116, "epoch": 0.07821313780048883, "grad_norm": 0.10519551485776901, "learning_rate": 1e-06, "loss": -0.0229, "step": 838 }, { "clip_ratio/high_max": 0.002930307964561507, "clip_ratio/high_mean": 0.0012489281725720502, "clip_ratio/low_mean": 0.001091754149456392, "clip_ratio/low_min": 9.750380831974326e-05, "clip_ratio/region_mean": 0.002340682352951262, "epoch": 0.07830647090048942, "grad_norm": 0.1064944714307785, "learning_rate": 1e-06, "loss": -0.0335, "step": 839 }, { "clip_ratio/high_max": 0.0028890523899463005, "clip_ratio/high_mean": 0.0012158993595221546, "clip_ratio/low_mean": 0.0011043915510526858, "clip_ratio/low_min": 5.079952461528592e-05, "clip_ratio/region_mean": 0.0023202909287647344, "epoch": 0.07839980400049, "grad_norm": 0.1067441925406456, "learning_rate": 1e-06, "loss": -0.0023, "step": 840 }, { "clip_ratio/high_max": 0.002793265895888908, "clip_ratio/high_mean": 0.0011566314387891907, "clip_ratio/low_mean": 0.0013418986018223222, "clip_ratio/low_min": 5.787646114185918e-05, "clip_ratio/region_mean": 0.0024985300260595977, "epoch": 0.07849313710049058, "grad_norm": 0.1163916364312172, "learning_rate": 1e-06, "loss": 0.02, "step": 841 }, { "clip_ratio/high_max": 0.0023071855248417705, "clip_ratio/high_mean": 0.0010460580670041963, "clip_ratio/low_mean": 0.0011510213262226898, "clip_ratio/low_min": 0.00017384290185873397, "clip_ratio/region_mean": 0.0021970793895889074, "epoch": 0.07858647020049117, "grad_norm": 0.11197704821825027, "learning_rate": 1e-06, "loss": 0.0124, "step": 842 }, { "clip_ratio/high_max": 0.0026716525098890997, "clip_ratio/high_mean": 0.0011010273428837536, "clip_ratio/low_mean": 0.0012139241334807593, "clip_ratio/low_min": 5.7995189308712725e-05, "clip_ratio/region_mean": 0.002314951503649354, "epoch": 0.07867980330049175, "grad_norm": 0.117385134100914, "learning_rate": 1e-06, "loss": 0.0185, "step": 843 }, { "clip_ratio/high_max": 0.0029825856909155846, "clip_ratio/high_mean": 0.0012078943036613055, "clip_ratio/low_mean": 0.001284895039134426, "clip_ratio/low_min": 7.221832129289396e-05, "clip_ratio/region_mean": 0.002492789259122219, "epoch": 0.07877313640049233, "grad_norm": 0.10704333335161209, "learning_rate": 1e-06, "loss": -0.0033, "step": 844 }, { "clip_ratio/high_max": 0.0028190696612000465, "clip_ratio/high_mean": 0.0011748472570616286, "clip_ratio/low_mean": 0.0011886053762282245, "clip_ratio/low_min": 0.00013773003411188256, "clip_ratio/region_mean": 0.002363452615099959, "epoch": 0.07886646950049292, "grad_norm": 0.11524241417646408, "learning_rate": 1e-06, "loss": 0.0061, "step": 845 }, { "clip_ratio/high_max": 0.0026027618878288195, "clip_ratio/high_mean": 0.0011713986968970858, "clip_ratio/low_mean": 0.0013452168423100375, "clip_ratio/low_min": 0.00020449037401704118, "clip_ratio/region_mean": 0.0025166155173792504, "epoch": 0.0789598026004935, "grad_norm": 0.1149684265255928, "learning_rate": 1e-06, "loss": 0.0434, "step": 846 }, { "clip_ratio/high_max": 0.0027084273388027214, "clip_ratio/high_mean": 0.0012229471067257691, "clip_ratio/low_mean": 0.0012291662824281957, "clip_ratio/low_min": 0.00013775792467640713, "clip_ratio/region_mean": 0.002452113381878007, "epoch": 0.07905313570049408, "grad_norm": 0.11223486810922623, "learning_rate": 1e-06, "loss": 0.0415, "step": 847 }, { "clip_ratio/high_max": 0.0023342169188254047, "clip_ratio/high_mean": 0.00104853719494713, "clip_ratio/low_mean": 0.001133318828578922, "clip_ratio/low_min": 7.183461275417358e-05, "clip_ratio/region_mean": 0.0021818560126121156, "epoch": 0.07914646880049467, "grad_norm": 0.11231688410043716, "learning_rate": 1e-06, "loss": 0.0376, "step": 848 }, { "clip_ratio/high_max": 0.002410042034171056, "clip_ratio/high_mean": 0.0010396162579127122, "clip_ratio/low_mean": 0.0012280759401619434, "clip_ratio/low_min": 2.5278059183619916e-05, "clip_ratio/region_mean": 0.002267692194436677, "epoch": 0.07923980190049525, "grad_norm": 0.10515587776899338, "learning_rate": 1e-06, "loss": 0.0175, "step": 849 }, { "clip_ratio/high_max": 0.0026403689917060547, "clip_ratio/high_mean": 0.00108723109588027, "clip_ratio/low_mean": 0.0011319163877487881, "clip_ratio/low_min": 7.417487086058827e-05, "clip_ratio/region_mean": 0.002219147529103793, "epoch": 0.07933313500049584, "grad_norm": 0.11551614105701447, "learning_rate": 1e-06, "loss": 0.0021, "step": 850 }, { "clip_ratio/high_max": 0.002728172690694919, "clip_ratio/high_mean": 0.0011947342190978816, "clip_ratio/low_mean": 0.0012031684673274867, "clip_ratio/low_min": 7.300599918380613e-05, "clip_ratio/region_mean": 0.0023979027319001034, "epoch": 0.07942646810049642, "grad_norm": 0.11337242275476456, "learning_rate": 1e-06, "loss": 0.0193, "step": 851 }, { "clip_ratio/high_max": 0.0028930796834174544, "clip_ratio/high_mean": 0.0011206011458853027, "clip_ratio/low_mean": 0.0013113266868458595, "clip_ratio/low_min": 0.00011218701638426865, "clip_ratio/region_mean": 0.0024319278090843, "epoch": 0.079519801200497, "grad_norm": 0.1045721247792244, "learning_rate": 1e-06, "loss": 0.0228, "step": 852 }, { "clip_ratio/high_max": 0.0021872713186894543, "clip_ratio/high_mean": 0.000961685353104258, "clip_ratio/low_mean": 0.0012616278145287652, "clip_ratio/low_min": 0.00014936704792489763, "clip_ratio/region_mean": 0.0022233130803215317, "epoch": 0.07961313430049759, "grad_norm": 0.1051497608423233, "learning_rate": 1e-06, "loss": 0.0498, "step": 853 }, { "clip_ratio/high_max": 0.002884042500227224, "clip_ratio/high_mean": 0.0012240650030435063, "clip_ratio/low_mean": 0.001252728863619268, "clip_ratio/low_min": 0.00020089035842829617, "clip_ratio/region_mean": 0.0024767938666627742, "epoch": 0.07970646740049817, "grad_norm": 0.1235123798251152, "learning_rate": 1e-06, "loss": 0.0036, "step": 854 }, { "clip_ratio/high_max": 0.00263397454546066, "clip_ratio/high_mean": 0.0010142055889446056, "clip_ratio/low_mean": 0.0015909727226244286, "clip_ratio/low_min": 0.00019899124799849233, "clip_ratio/region_mean": 0.002605178357043769, "epoch": 0.07979980050049874, "grad_norm": 0.1250627636909485, "learning_rate": 1e-06, "loss": 0.07, "step": 855 }, { "clip_ratio/high_max": 0.002509389945771545, "clip_ratio/high_mean": 0.001222724633407779, "clip_ratio/low_mean": 0.00121611468057381, "clip_ratio/low_min": 5.433012483990751e-05, "clip_ratio/region_mean": 0.002438839292153716, "epoch": 0.07989313360049934, "grad_norm": 0.11074072867631912, "learning_rate": 1e-06, "loss": 0.0144, "step": 856 }, { "clip_ratio/high_max": 0.002395431110926438, "clip_ratio/high_mean": 0.0011117208487121388, "clip_ratio/low_mean": 0.001253226993867429, "clip_ratio/low_min": 0.0001308160190092167, "clip_ratio/region_mean": 0.002364947860769462, "epoch": 0.07998646670049991, "grad_norm": 0.11242078989744186, "learning_rate": 1e-06, "loss": 0.0323, "step": 857 }, { "clip_ratio/high_max": 0.002333924509002827, "clip_ratio/high_mean": 0.0010449303554196376, "clip_ratio/low_mean": 0.0013359494896576507, "clip_ratio/low_min": 0.00010576903787296033, "clip_ratio/region_mean": 0.0023808798359823413, "epoch": 0.08007979980050049, "grad_norm": 0.1026012971997261, "learning_rate": 1e-06, "loss": 0.0254, "step": 858 }, { "clip_ratio/high_max": 0.0029559947943198495, "clip_ratio/high_mean": 0.0011964665682171471, "clip_ratio/low_mean": 0.0013974406283523422, "clip_ratio/low_min": 0.00021447910694405437, "clip_ratio/region_mean": 0.002593907149275765, "epoch": 0.08017313290050108, "grad_norm": 0.11363668739795685, "learning_rate": 1e-06, "loss": 0.0268, "step": 859 }, { "clip_ratio/high_max": 0.0025750106360646896, "clip_ratio/high_mean": 0.0011252224030613434, "clip_ratio/low_mean": 0.0011689675484376494, "clip_ratio/low_min": 8.529292972525582e-05, "clip_ratio/region_mean": 0.0022941899951547384, "epoch": 0.08026646600050166, "grad_norm": 0.11206766963005066, "learning_rate": 1e-06, "loss": 0.0089, "step": 860 }, { "clip_ratio/high_max": 0.0026344709185650572, "clip_ratio/high_mean": 0.0011701535950123798, "clip_ratio/low_mean": 0.001301527696341509, "clip_ratio/low_min": 0.00013713348016608506, "clip_ratio/region_mean": 0.002471681291353889, "epoch": 0.08035979910050225, "grad_norm": 0.6191704273223877, "learning_rate": 1e-06, "loss": 0.0287, "step": 861 }, { "clip_ratio/high_max": 0.002676965232240036, "clip_ratio/high_mean": 0.001188356374768773, "clip_ratio/low_mean": 0.001243033017090056, "clip_ratio/low_min": 8.402799539908301e-05, "clip_ratio/region_mean": 0.0024313893882208504, "epoch": 0.08045313220050283, "grad_norm": 0.11981311440467834, "learning_rate": 1e-06, "loss": 0.0133, "step": 862 }, { "clip_ratio/high_max": 0.0029913152975495905, "clip_ratio/high_mean": 0.001332960480795009, "clip_ratio/low_mean": 0.0013263007094792556, "clip_ratio/low_min": 9.373093053000048e-05, "clip_ratio/region_mean": 0.0026592612848617136, "epoch": 0.08054646530050341, "grad_norm": 0.10774748772382736, "learning_rate": 1e-06, "loss": 0.0051, "step": 863 }, { "clip_ratio/high_max": 0.0022657581212115474, "clip_ratio/high_mean": 0.001019522193018929, "clip_ratio/low_mean": 0.0012398361341183772, "clip_ratio/low_min": 0.00012176808741060086, "clip_ratio/region_mean": 0.0022593583198613487, "epoch": 0.080639798400504, "grad_norm": 0.10538513213396072, "learning_rate": 1e-06, "loss": 0.034, "step": 864 }, { "clip_ratio/high_max": 0.0032340588804800063, "clip_ratio/high_mean": 0.0013334745563042816, "clip_ratio/low_mean": 0.0011402142336010002, "clip_ratio/low_min": 7.988152538018767e-05, "clip_ratio/region_mean": 0.002473688822647091, "epoch": 0.08073313150050458, "grad_norm": 0.12368640303611755, "learning_rate": 1e-06, "loss": -0.0055, "step": 865 }, { "clip_ratio/high_max": 0.0027304650720907375, "clip_ratio/high_mean": 0.0011897902186319698, "clip_ratio/low_mean": 0.001282854387682164, "clip_ratio/low_min": 0.00012809545660275035, "clip_ratio/region_mean": 0.002472644584486261, "epoch": 0.08082646460050516, "grad_norm": 0.11296059191226959, "learning_rate": 1e-06, "loss": 0.0263, "step": 866 }, { "clip_ratio/high_max": 0.0026717867731349543, "clip_ratio/high_mean": 0.001015289382849005, "clip_ratio/low_mean": 0.001372029826598009, "clip_ratio/low_min": 4.58370550404652e-05, "clip_ratio/region_mean": 0.0023873192112660035, "epoch": 0.08091979770050575, "grad_norm": 0.11703436076641083, "learning_rate": 1e-06, "loss": 0.0705, "step": 867 }, { "clip_ratio/high_max": 0.00266984743939247, "clip_ratio/high_mean": 0.001182723219244508, "clip_ratio/low_mean": 0.0013851462026650552, "clip_ratio/low_min": 5.295644950820133e-05, "clip_ratio/region_mean": 0.002567869334598072, "epoch": 0.08101313080050633, "grad_norm": 0.120257169008255, "learning_rate": 1e-06, "loss": 0.0198, "step": 868 }, { "clip_ratio/high_max": 0.0026716306529124267, "clip_ratio/high_mean": 0.0012233324996486772, "clip_ratio/low_mean": 0.0012099921932531288, "clip_ratio/low_min": 9.078643597604241e-05, "clip_ratio/region_mean": 0.002433324720186647, "epoch": 0.08110646390050691, "grad_norm": 0.11764660477638245, "learning_rate": 1e-06, "loss": -0.017, "step": 869 }, { "clip_ratio/high_max": 0.0030705239478265867, "clip_ratio/high_mean": 0.0012332064143265598, "clip_ratio/low_mean": 0.0014900982932886109, "clip_ratio/low_min": 0.00011409409489715472, "clip_ratio/region_mean": 0.0027233046930632554, "epoch": 0.0811997970005075, "grad_norm": 0.11546960473060608, "learning_rate": 1e-06, "loss": 0.0333, "step": 870 }, { "clip_ratio/high_max": 0.003083862153289374, "clip_ratio/high_mean": 0.0012245197758602444, "clip_ratio/low_mean": 0.0012763446320604999, "clip_ratio/low_min": 0.00011840130991913611, "clip_ratio/region_mean": 0.002500864415196702, "epoch": 0.08129313010050808, "grad_norm": 0.12118712067604065, "learning_rate": 1e-06, "loss": -0.0064, "step": 871 }, { "clip_ratio/high_max": 0.0029794194852001965, "clip_ratio/high_mean": 0.0013708833139389753, "clip_ratio/low_mean": 0.0014867627505736891, "clip_ratio/low_min": 0.0001623352109163534, "clip_ratio/region_mean": 0.002857646053598728, "epoch": 0.08138646320050867, "grad_norm": 0.1265120506286621, "learning_rate": 1e-06, "loss": -0.0439, "step": 872 }, { "clip_ratio/high_max": 0.002703121557715349, "clip_ratio/high_mean": 0.0011338754047756083, "clip_ratio/low_mean": 0.0016530198154214304, "clip_ratio/low_min": 0.00010148712135560345, "clip_ratio/region_mean": 0.0027868952238350175, "epoch": 0.08147979630050925, "grad_norm": 0.10864236950874329, "learning_rate": 1e-06, "loss": 0.0292, "step": 873 }, { "clip_ratio/high_max": 0.0025929372532118578, "clip_ratio/high_mean": 0.0010713470146583859, "clip_ratio/low_mean": 0.00130601479759207, "clip_ratio/low_min": 4.2274246879969724e-05, "clip_ratio/region_mean": 0.0023773618013365194, "epoch": 0.08157312940050983, "grad_norm": 0.10903564840555191, "learning_rate": 1e-06, "loss": 0.0344, "step": 874 }, { "clip_ratio/high_max": 0.00267447571241064, "clip_ratio/high_mean": 0.0011267735535511747, "clip_ratio/low_mean": 0.001447578670195071, "clip_ratio/low_min": 0.000233820806897711, "clip_ratio/region_mean": 0.002574352256488055, "epoch": 0.08166646250051042, "grad_norm": 0.10689591616392136, "learning_rate": 1e-06, "loss": 0.0339, "step": 875 }, { "clip_ratio/high_max": 0.002816759282723069, "clip_ratio/high_mean": 0.0013318099736352451, "clip_ratio/low_mean": 0.0014332242935779504, "clip_ratio/low_min": 0.00016015996880014427, "clip_ratio/region_mean": 0.002765034296317026, "epoch": 0.081759795600511, "grad_norm": 0.15753105282783508, "learning_rate": 1e-06, "loss": 0.0182, "step": 876 }, { "clip_ratio/high_max": 0.0028222528599144425, "clip_ratio/high_mean": 0.0011794420606747735, "clip_ratio/low_mean": 0.0014150172319205012, "clip_ratio/low_min": 7.90039212006377e-06, "clip_ratio/region_mean": 0.0025944592998712324, "epoch": 0.08185312870051158, "grad_norm": 0.12091733515262604, "learning_rate": 1e-06, "loss": 0.0294, "step": 877 }, { "clip_ratio/high_max": 0.0026184621565334965, "clip_ratio/high_mean": 0.0012445320462575182, "clip_ratio/low_mean": 0.001264230773813324, "clip_ratio/low_min": 5.083018641016679e-05, "clip_ratio/region_mean": 0.002508762845536694, "epoch": 0.08194646180051217, "grad_norm": 0.10422682017087936, "learning_rate": 1e-06, "loss": 0.0231, "step": 878 }, { "clip_ratio/high_max": 0.002115822055202443, "clip_ratio/high_mean": 0.0009537266942061251, "clip_ratio/low_mean": 0.0016013810054573696, "clip_ratio/low_min": 0.00017287244736508, "clip_ratio/region_mean": 0.0025551076614647172, "epoch": 0.08203979490051275, "grad_norm": 0.10884254425764084, "learning_rate": 1e-06, "loss": 0.0533, "step": 879 }, { "clip_ratio/high_max": 0.0025673884301795624, "clip_ratio/high_mean": 0.0010874926865653833, "clip_ratio/low_mean": 0.0013618801931443159, "clip_ratio/low_min": 0.00015549774252576753, "clip_ratio/region_mean": 0.002449372928822413, "epoch": 0.08213312800051333, "grad_norm": 0.11862734705209732, "learning_rate": 1e-06, "loss": 0.0801, "step": 880 }, { "clip_ratio/high_max": 0.0028271170594962314, "clip_ratio/high_mean": 0.0011623361006058985, "clip_ratio/low_mean": 0.0014281211742854794, "clip_ratio/low_min": 0.00022740460372006055, "clip_ratio/region_mean": 0.002590457246697042, "epoch": 0.08222646110051392, "grad_norm": 0.11558318138122559, "learning_rate": 1e-06, "loss": 0.0596, "step": 881 }, { "clip_ratio/high_max": 0.00272104359100922, "clip_ratio/high_mean": 0.001244183591552428, "clip_ratio/low_mean": 0.0013736167566094082, "clip_ratio/low_min": 0.00022559764056495624, "clip_ratio/region_mean": 0.0026178003463428468, "epoch": 0.0823197942005145, "grad_norm": 0.11596973985433578, "learning_rate": 1e-06, "loss": 0.0514, "step": 882 }, { "clip_ratio/high_max": 0.0030806600334472023, "clip_ratio/high_mean": 0.0011541204803506844, "clip_ratio/low_mean": 0.0013255390513222665, "clip_ratio/low_min": 0.00014947133058740292, "clip_ratio/region_mean": 0.0024796595607767813, "epoch": 0.08241312730051509, "grad_norm": 0.1079336479306221, "learning_rate": 1e-06, "loss": 0.041, "step": 883 }, { "clip_ratio/high_max": 0.002756727328232955, "clip_ratio/high_mean": 0.0012181342699477682, "clip_ratio/low_mean": 0.0014362711772264447, "clip_ratio/low_min": 0.00019357741985004395, "clip_ratio/region_mean": 0.0026544054780970328, "epoch": 0.08250646040051567, "grad_norm": 0.10953733325004578, "learning_rate": 1e-06, "loss": 0.0351, "step": 884 }, { "clip_ratio/high_max": 0.002775097222183831, "clip_ratio/high_mean": 0.001262925346964039, "clip_ratio/low_mean": 0.00116158917080611, "clip_ratio/low_min": 7.620512496941956e-05, "clip_ratio/region_mean": 0.0024245145032182336, "epoch": 0.08259979350051624, "grad_norm": 0.11950094997882843, "learning_rate": 1e-06, "loss": 0.0049, "step": 885 }, { "clip_ratio/high_max": 0.002757563030172605, "clip_ratio/high_mean": 0.0011204800066479947, "clip_ratio/low_mean": 0.0012934416954522021, "clip_ratio/low_min": 0.00011284019365120912, "clip_ratio/region_mean": 0.002413921771221794, "epoch": 0.08269312660051684, "grad_norm": 0.10900633037090302, "learning_rate": 1e-06, "loss": -0.0058, "step": 886 }, { "clip_ratio/high_max": 0.0028210538839630317, "clip_ratio/high_mean": 0.001050157366989879, "clip_ratio/low_mean": 0.0016014536304282956, "clip_ratio/low_min": 0.0002004827492783079, "clip_ratio/region_mean": 0.0026516110519878566, "epoch": 0.08278645970051741, "grad_norm": 0.11915874481201172, "learning_rate": 1e-06, "loss": 0.0302, "step": 887 }, { "clip_ratio/high_max": 0.002610648218251299, "clip_ratio/high_mean": 0.0011100116025772877, "clip_ratio/low_mean": 0.001413670281181112, "clip_ratio/low_min": 0.00024200794359785505, "clip_ratio/region_mean": 0.0025236818837583996, "epoch": 0.08287979280051799, "grad_norm": 0.11571001261472702, "learning_rate": 1e-06, "loss": 0.0461, "step": 888 }, { "clip_ratio/high_max": 0.0025575663821655326, "clip_ratio/high_mean": 0.0011261267063673586, "clip_ratio/low_mean": 0.0012038480526825879, "clip_ratio/low_min": 0.00016985584079520777, "clip_ratio/region_mean": 0.002329974711756222, "epoch": 0.08297312590051859, "grad_norm": 0.1134910061955452, "learning_rate": 1e-06, "loss": 0.0107, "step": 889 }, { "clip_ratio/high_max": 0.002807971635775175, "clip_ratio/high_mean": 0.0011861139719258063, "clip_ratio/low_mean": 0.0013165185700927395, "clip_ratio/low_min": 0.00015775557494634995, "clip_ratio/region_mean": 0.0025026325820363127, "epoch": 0.08306645900051916, "grad_norm": 0.10823081433773041, "learning_rate": 1e-06, "loss": 0.0217, "step": 890 }, { "clip_ratio/high_max": 0.0026585467348922975, "clip_ratio/high_mean": 0.0011474129896669183, "clip_ratio/low_mean": 0.0014322423594421707, "clip_ratio/low_min": 0.00011148946487082867, "clip_ratio/region_mean": 0.0025796553745749407, "epoch": 0.08315979210051976, "grad_norm": 0.11732596904039383, "learning_rate": 1e-06, "loss": 0.0376, "step": 891 }, { "clip_ratio/high_max": 0.002762632444500923, "clip_ratio/high_mean": 0.0012664586683968082, "clip_ratio/low_mean": 0.0013926984611316584, "clip_ratio/low_min": 7.680372254981194e-05, "clip_ratio/region_mean": 0.002659157144080382, "epoch": 0.08325312520052033, "grad_norm": 0.10904115438461304, "learning_rate": 1e-06, "loss": 0.0139, "step": 892 }, { "clip_ratio/high_max": 0.0030935159302316606, "clip_ratio/high_mean": 0.0012770246357831638, "clip_ratio/low_mean": 0.001306820984609658, "clip_ratio/low_min": 5.368848542275373e-05, "clip_ratio/region_mean": 0.0025838457222562283, "epoch": 0.08334645830052091, "grad_norm": 0.1260535717010498, "learning_rate": 1e-06, "loss": 0.0389, "step": 893 }, { "clip_ratio/high_max": 0.0028869533707620576, "clip_ratio/high_mean": 0.0012445216380001511, "clip_ratio/low_mean": 0.001347474528301973, "clip_ratio/low_min": 0.00010394027776783332, "clip_ratio/region_mean": 0.002591996220871806, "epoch": 0.0834397914005215, "grad_norm": 0.11635053157806396, "learning_rate": 1e-06, "loss": 0.0175, "step": 894 }, { "clip_ratio/high_max": 0.0027193515707040206, "clip_ratio/high_mean": 0.0012923296562803444, "clip_ratio/low_mean": 0.0013158954025129788, "clip_ratio/low_min": 0.00013268914153741207, "clip_ratio/region_mean": 0.002608225062431302, "epoch": 0.08353312450052208, "grad_norm": 0.12040353566408157, "learning_rate": 1e-06, "loss": -0.0061, "step": 895 }, { "clip_ratio/high_max": 0.0025315207603853196, "clip_ratio/high_mean": 0.001118036514526466, "clip_ratio/low_mean": 0.0014116798229224514, "clip_ratio/low_min": 8.406334018218331e-05, "clip_ratio/region_mean": 0.0025297163956565782, "epoch": 0.08362645760052266, "grad_norm": 0.4274236559867859, "learning_rate": 1e-06, "loss": 0.0508, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010332380022321397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 604.8611450195312, "completions/mean_terminated_length": 568.4127197265625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.08371979070052325, "grad_norm": 0.13180045783519745, "learning_rate": 1e-06, "loss": 0.0251, "num_tokens": 647356960.0, "reward": 0.6045271158218384, "reward_std": 0.19037386775016785, "rewards/simpleverify_reward/mean": 0.6045270562171936, "rewards/simpleverify_reward/std": 0.4889540672302246, "step": 897 }, { "clip_ratio/high_max": 0.0020903320437355433, "clip_ratio/high_mean": 0.000811422971310094, "clip_ratio/low_mean": 0.0004955820868417504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013070050299575087, "epoch": 0.08381312380052383, "grad_norm": 0.12040230631828308, "learning_rate": 1e-06, "loss": 0.0368, "step": 898 }, { "clip_ratio/high_max": 0.0019020930922124535, "clip_ratio/high_mean": 0.0008108365400403272, "clip_ratio/low_mean": 0.0005846929034305504, "clip_ratio/low_min": 2.8351099899737164e-05, "clip_ratio/region_mean": 0.0013955294416518882, "epoch": 0.08390645690052441, "grad_norm": 0.11102444678544998, "learning_rate": 1e-06, "loss": 0.0132, "step": 899 }, { "clip_ratio/high_max": 0.0021109771987539716, "clip_ratio/high_mean": 0.0009411347873538034, "clip_ratio/low_mean": 0.0005980320720482268, "clip_ratio/low_min": 3.838850534521043e-05, "clip_ratio/region_mean": 0.0015391668493975885, "epoch": 0.083999790000525, "grad_norm": 0.13241131603717804, "learning_rate": 1e-06, "loss": -0.0254, "step": 900 }, { "clip_ratio/high_max": 0.001951029320480302, "clip_ratio/high_mean": 0.0008549535177735379, "clip_ratio/low_mean": 0.0006839488560217433, "clip_ratio/low_min": 8.409043948631734e-05, "clip_ratio/region_mean": 0.0015389024010801222, "epoch": 0.08409312310052558, "grad_norm": 0.10430038720369339, "learning_rate": 1e-06, "loss": 0.0262, "step": 901 }, { "clip_ratio/high_max": 0.0017992737630265765, "clip_ratio/high_mean": 0.000774283518694574, "clip_ratio/low_mean": 0.0007132975924832863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014875811466481537, "epoch": 0.08418645620052617, "grad_norm": 0.5818588137626648, "learning_rate": 1e-06, "loss": 0.0309, "step": 902 }, { "clip_ratio/high_max": 0.0021047825066489168, "clip_ratio/high_mean": 0.0008510122970619705, "clip_ratio/low_mean": 0.0008836374217935372, "clip_ratio/low_min": 1.541307028674055e-05, "clip_ratio/region_mean": 0.0017346497188555077, "epoch": 0.08427978930052675, "grad_norm": 0.1185864806175232, "learning_rate": 1e-06, "loss": 0.053, "step": 903 }, { "clip_ratio/high_max": 0.0022040233852749225, "clip_ratio/high_mean": 0.0008466147646686295, "clip_ratio/low_mean": 0.0008286899992526742, "clip_ratio/low_min": 5.653566404362209e-05, "clip_ratio/region_mean": 0.0016753047239035368, "epoch": 0.08437312240052733, "grad_norm": 0.12117908149957657, "learning_rate": 1e-06, "loss": 0.0533, "step": 904 }, { "clip_ratio/high_max": 0.0018884313103626482, "clip_ratio/high_mean": 0.000758550286263926, "clip_ratio/low_mean": 0.0008438970198767493, "clip_ratio/low_min": 0.0001521305075584678, "clip_ratio/region_mean": 0.0016024473079596646, "epoch": 0.08446645550052792, "grad_norm": 0.10082881152629852, "learning_rate": 1e-06, "loss": 0.0428, "step": 905 }, { "clip_ratio/high_max": 0.002562358098657569, "clip_ratio/high_mean": 0.0010030534522229573, "clip_ratio/low_mean": 0.0009143080096691847, "clip_ratio/low_min": 6.863703765702667e-05, "clip_ratio/region_mean": 0.0019173614491592161, "epoch": 0.0845597886005285, "grad_norm": 0.12264148890972137, "learning_rate": 1e-06, "loss": 0.0363, "step": 906 }, { "clip_ratio/high_max": 0.002145728485629661, "clip_ratio/high_mean": 0.0009420317655894905, "clip_ratio/low_mean": 0.0008138321354635991, "clip_ratio/low_min": 1.2980270184925757e-05, "clip_ratio/region_mean": 0.0017558639010530896, "epoch": 0.08465312170052908, "grad_norm": 0.11554498225450516, "learning_rate": 1e-06, "loss": 0.0079, "step": 907 }, { "clip_ratio/high_max": 0.0022196019417606294, "clip_ratio/high_mean": 0.0010051094632217428, "clip_ratio/low_mean": 0.0009380039646202931, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019431133841862902, "epoch": 0.08474645480052967, "grad_norm": 0.12976033985614777, "learning_rate": 1e-06, "loss": 0.0387, "step": 908 }, { "clip_ratio/high_max": 0.0023158696276368573, "clip_ratio/high_mean": 0.0010266714125464205, "clip_ratio/low_mean": 0.0009847607452684315, "clip_ratio/low_min": 8.343649278685916e-05, "clip_ratio/region_mean": 0.0020114321014261805, "epoch": 0.08483978790053025, "grad_norm": 0.11366299539804459, "learning_rate": 1e-06, "loss": 0.0467, "step": 909 }, { "clip_ratio/high_max": 0.0024726804476813413, "clip_ratio/high_mean": 0.001053126548868022, "clip_ratio/low_mean": 0.0009124658427026588, "clip_ratio/low_min": 0.00011002197788911872, "clip_ratio/region_mean": 0.0019655923533719033, "epoch": 0.08493312100053083, "grad_norm": 0.1178712546825409, "learning_rate": 1e-06, "loss": 0.0373, "step": 910 }, { "clip_ratio/high_max": 0.002646814682520926, "clip_ratio/high_mean": 0.0010974636225000722, "clip_ratio/low_mean": 0.0009178663640341256, "clip_ratio/low_min": 7.914322213764535e-05, "clip_ratio/region_mean": 0.0020153299628873356, "epoch": 0.08502645410053142, "grad_norm": 0.13061457872390747, "learning_rate": 1e-06, "loss": -0.0102, "step": 911 }, { "clip_ratio/high_max": 0.0020276003779144958, "clip_ratio/high_mean": 0.0009705177908472251, "clip_ratio/low_mean": 0.0008762455418036552, "clip_ratio/low_min": 6.748240321030607e-05, "clip_ratio/region_mean": 0.0018467633708496578, "epoch": 0.085119787200532, "grad_norm": 0.3669838011264801, "learning_rate": 1e-06, "loss": 0.0128, "step": 912 }, { "clip_ratio/high_max": 0.0026896395211224444, "clip_ratio/high_mean": 0.0011159031182614854, "clip_ratio/low_mean": 0.0010102335581905209, "clip_ratio/low_min": 9.22293202165747e-05, "clip_ratio/region_mean": 0.0021261366200633347, "epoch": 0.08521312030053259, "grad_norm": 0.11618983000516891, "learning_rate": 1e-06, "loss": 0.0238, "step": 913 }, { "clip_ratio/high_max": 0.002566167931945529, "clip_ratio/high_mean": 0.0010500563876121305, "clip_ratio/low_mean": 0.0010380182466178667, "clip_ratio/low_min": 0.0001485766224504914, "clip_ratio/region_mean": 0.0020880746305920184, "epoch": 0.08530645340053317, "grad_norm": 0.12886705994606018, "learning_rate": 1e-06, "loss": -0.0104, "step": 914 }, { "clip_ratio/high_max": 0.002359906822675839, "clip_ratio/high_mean": 0.000975397793808952, "clip_ratio/low_mean": 0.0010993423056788743, "clip_ratio/low_min": 0.0001564110552862985, "clip_ratio/region_mean": 0.0020747400922118686, "epoch": 0.08539978650053374, "grad_norm": 0.11995469033718109, "learning_rate": 1e-06, "loss": 0.0523, "step": 915 }, { "clip_ratio/high_max": 0.0026835996759473346, "clip_ratio/high_mean": 0.0011424657695897622, "clip_ratio/low_mean": 0.0011959493770064, "clip_ratio/low_min": 0.00016448516544187441, "clip_ratio/region_mean": 0.0023384150917991064, "epoch": 0.08549311960053434, "grad_norm": 0.11073128879070282, "learning_rate": 1e-06, "loss": 0.0087, "step": 916 }, { "clip_ratio/high_max": 0.0022553076269105077, "clip_ratio/high_mean": 0.0009291547939938027, "clip_ratio/low_mean": 0.0012759980709233787, "clip_ratio/low_min": 0.00013660632794199046, "clip_ratio/region_mean": 0.002205152908572927, "epoch": 0.08558645270053492, "grad_norm": 0.1244710385799408, "learning_rate": 1e-06, "loss": 0.0431, "step": 917 }, { "clip_ratio/high_max": 0.0025015782011905685, "clip_ratio/high_mean": 0.0010640408781910082, "clip_ratio/low_mean": 0.001145112793892622, "clip_ratio/low_min": 1.7250897144549526e-05, "clip_ratio/region_mean": 0.00220915370300645, "epoch": 0.0856797858005355, "grad_norm": 0.12307003885507584, "learning_rate": 1e-06, "loss": 0.0071, "step": 918 }, { "clip_ratio/high_max": 0.0024824382344377227, "clip_ratio/high_mean": 0.0010619299973768648, "clip_ratio/low_mean": 0.001083579430996906, "clip_ratio/low_min": 3.900230694853235e-05, "clip_ratio/region_mean": 0.00214550946111558, "epoch": 0.08577311890053609, "grad_norm": 0.1148950457572937, "learning_rate": 1e-06, "loss": 0.0205, "step": 919 }, { "clip_ratio/high_max": 0.0025234888889826834, "clip_ratio/high_mean": 0.0010395415738457814, "clip_ratio/low_mean": 0.0011789917807618622, "clip_ratio/low_min": 4.821899619855685e-05, "clip_ratio/region_mean": 0.0022185333364177495, "epoch": 0.08586645200053666, "grad_norm": 0.1140666976571083, "learning_rate": 1e-06, "loss": 0.0252, "step": 920 }, { "clip_ratio/high_max": 0.002604686866106931, "clip_ratio/high_mean": 0.0012305085583648179, "clip_ratio/low_mean": 0.0011976935456914362, "clip_ratio/low_min": 7.791272673784988e-05, "clip_ratio/region_mean": 0.002428202162263915, "epoch": 0.08595978510053724, "grad_norm": 0.2378448247909546, "learning_rate": 1e-06, "loss": 0.0076, "step": 921 }, { "clip_ratio/high_max": 0.0022589188847632613, "clip_ratio/high_mean": 0.0009832003379415255, "clip_ratio/low_mean": 0.0010548079371801578, "clip_ratio/low_min": 7.642799937457312e-05, "clip_ratio/region_mean": 0.002038008278759662, "epoch": 0.08605311820053783, "grad_norm": 0.11038652807474136, "learning_rate": 1e-06, "loss": 0.0033, "step": 922 }, { "clip_ratio/high_max": 0.0029273399195517413, "clip_ratio/high_mean": 0.0012167400163889397, "clip_ratio/low_mean": 0.0011397859634598717, "clip_ratio/low_min": 6.687169388897019e-05, "clip_ratio/region_mean": 0.0023565260125906207, "epoch": 0.08614645130053841, "grad_norm": 0.10846617072820663, "learning_rate": 1e-06, "loss": 0.0068, "step": 923 }, { "clip_ratio/high_max": 0.0026016525152954273, "clip_ratio/high_mean": 0.0010866862867260352, "clip_ratio/low_mean": 0.0010949773532047402, "clip_ratio/low_min": 8.901059482013807e-05, "clip_ratio/region_mean": 0.002181663665396627, "epoch": 0.086239784400539, "grad_norm": 0.1089077815413475, "learning_rate": 1e-06, "loss": 0.0263, "step": 924 }, { "clip_ratio/high_max": 0.0021645506858476438, "clip_ratio/high_mean": 0.0010588792829366866, "clip_ratio/low_mean": 0.0011073275891249068, "clip_ratio/low_min": 5.3161484174779616e-05, "clip_ratio/region_mean": 0.0021662068756995723, "epoch": 0.08633311750053958, "grad_norm": 0.11644066870212555, "learning_rate": 1e-06, "loss": 0.0053, "step": 925 }, { "clip_ratio/high_max": 0.0024562943799537607, "clip_ratio/high_mean": 0.0011145613152621081, "clip_ratio/low_mean": 0.001179668674012646, "clip_ratio/low_min": 7.074141831253655e-05, "clip_ratio/region_mean": 0.002294229998369701, "epoch": 0.08642645060054016, "grad_norm": 0.12333524972200394, "learning_rate": 1e-06, "loss": -0.0313, "step": 926 }, { "clip_ratio/high_max": 0.002317774858966004, "clip_ratio/high_mean": 0.0011376612710591871, "clip_ratio/low_mean": 0.001218876896018628, "clip_ratio/low_min": 0.00014656296480097808, "clip_ratio/region_mean": 0.0023565382143715397, "epoch": 0.08651978370054075, "grad_norm": 0.11655978113412857, "learning_rate": 1e-06, "loss": -0.0032, "step": 927 }, { "clip_ratio/high_max": 0.0026441082591190934, "clip_ratio/high_mean": 0.001151671924162656, "clip_ratio/low_mean": 0.0013011670707783196, "clip_ratio/low_min": 8.304755101562478e-05, "clip_ratio/region_mean": 0.0024528390204068273, "epoch": 0.08661311680054133, "grad_norm": 0.12562242150306702, "learning_rate": 1e-06, "loss": 0.0219, "step": 928 }, { "clip_ratio/high_max": 0.00245697409263812, "clip_ratio/high_mean": 0.0010541370629653102, "clip_ratio/low_mean": 0.0013386718419496901, "clip_ratio/low_min": 0.00012002697440038901, "clip_ratio/region_mean": 0.0023928089140099473, "epoch": 0.08670644990054191, "grad_norm": 0.11794578284025192, "learning_rate": 1e-06, "loss": 0.0223, "step": 929 }, { "clip_ratio/high_max": 0.0023919832274259534, "clip_ratio/high_mean": 0.0010860691600100836, "clip_ratio/low_mean": 0.0013822069777233992, "clip_ratio/low_min": 9.76255096247769e-05, "clip_ratio/region_mean": 0.002468276121362578, "epoch": 0.0867997830005425, "grad_norm": 0.12333954870700836, "learning_rate": 1e-06, "loss": 0.0072, "step": 930 }, { "clip_ratio/high_max": 0.0025176404451485723, "clip_ratio/high_mean": 0.0010153427028853912, "clip_ratio/low_mean": 0.0013434517677524127, "clip_ratio/low_min": 8.390972107008565e-05, "clip_ratio/region_mean": 0.002358794503379613, "epoch": 0.08689311610054308, "grad_norm": 0.11604081094264984, "learning_rate": 1e-06, "loss": 0.0075, "step": 931 }, { "clip_ratio/high_max": 0.0021745060694229323, "clip_ratio/high_mean": 0.0009546953333483543, "clip_ratio/low_mean": 0.0015578732745780144, "clip_ratio/low_min": 0.0002290548136443249, "clip_ratio/region_mean": 0.0025125685933744535, "epoch": 0.08698644920054366, "grad_norm": 0.10966379195451736, "learning_rate": 1e-06, "loss": 0.0601, "step": 932 }, { "clip_ratio/high_max": 0.002769292696029879, "clip_ratio/high_mean": 0.001277127204957651, "clip_ratio/low_mean": 0.001254626158697647, "clip_ratio/low_min": 4.278972846805118e-05, "clip_ratio/region_mean": 0.0025317533873021603, "epoch": 0.08707978230054425, "grad_norm": 0.12039266526699066, "learning_rate": 1e-06, "loss": -0.0555, "step": 933 }, { "clip_ratio/high_max": 0.0028365253820084035, "clip_ratio/high_mean": 0.001245771494723158, "clip_ratio/low_mean": 0.0014007402023707982, "clip_ratio/low_min": 0.0001116386611101916, "clip_ratio/region_mean": 0.0026465117989573628, "epoch": 0.08717311540054483, "grad_norm": 0.12246831506490707, "learning_rate": 1e-06, "loss": 0.0121, "step": 934 }, { "clip_ratio/high_max": 0.002520124297006987, "clip_ratio/high_mean": 0.001015829599054996, "clip_ratio/low_mean": 0.0011046435474781902, "clip_ratio/low_min": 2.8745544113917276e-05, "clip_ratio/region_mean": 0.002120473094691988, "epoch": 0.08726644850054542, "grad_norm": 0.11671555042266846, "learning_rate": 1e-06, "loss": 0.0106, "step": 935 }, { "clip_ratio/high_max": 0.0024313078829436563, "clip_ratio/high_mean": 0.0010604682775010588, "clip_ratio/low_mean": 0.0012984707136638463, "clip_ratio/low_min": 0.00015652061847504228, "clip_ratio/region_mean": 0.0023589390111737885, "epoch": 0.087359781600546, "grad_norm": 0.11363453418016434, "learning_rate": 1e-06, "loss": 0.0391, "step": 936 }, { "clip_ratio/high_max": 0.0022487197420559824, "clip_ratio/high_mean": 0.0009560376274748705, "clip_ratio/low_mean": 0.0013037988319410942, "clip_ratio/low_min": 0.00011548104339453857, "clip_ratio/region_mean": 0.0022598364885197952, "epoch": 0.08745311470054658, "grad_norm": 0.11499801278114319, "learning_rate": 1e-06, "loss": 0.043, "step": 937 }, { "clip_ratio/high_max": 0.0026057502400362864, "clip_ratio/high_mean": 0.0011198134689038852, "clip_ratio/low_mean": 0.0012438147350621875, "clip_ratio/low_min": 0.0001233796137967147, "clip_ratio/region_mean": 0.002363628249440808, "epoch": 0.08754644780054717, "grad_norm": 0.1672276109457016, "learning_rate": 1e-06, "loss": 0.0293, "step": 938 }, { "clip_ratio/high_max": 0.00285910876118578, "clip_ratio/high_mean": 0.0010590961210255045, "clip_ratio/low_mean": 0.0013069356718915515, "clip_ratio/low_min": 0.0002795633099594852, "clip_ratio/region_mean": 0.0023660318038309924, "epoch": 0.08763978090054775, "grad_norm": 0.11041127890348434, "learning_rate": 1e-06, "loss": 0.0173, "step": 939 }, { "clip_ratio/high_max": 0.0026718849330791272, "clip_ratio/high_mean": 0.001027100719511509, "clip_ratio/low_mean": 0.0012909763208881486, "clip_ratio/low_min": 9.275022330257343e-05, "clip_ratio/region_mean": 0.002318077051313594, "epoch": 0.08773311400054833, "grad_norm": 0.11999907344579697, "learning_rate": 1e-06, "loss": 0.0269, "step": 940 }, { "clip_ratio/high_max": 0.002634908159961924, "clip_ratio/high_mean": 0.0011355004207871389, "clip_ratio/low_mean": 0.0012214113703521434, "clip_ratio/low_min": 0.00018193683627032442, "clip_ratio/region_mean": 0.0023569117693114094, "epoch": 0.08782644710054892, "grad_norm": 0.2274075597524643, "learning_rate": 1e-06, "loss": 0.0457, "step": 941 }, { "clip_ratio/high_max": 0.002534883147745859, "clip_ratio/high_mean": 0.0012450102694856469, "clip_ratio/low_mean": 0.0011261576237302506, "clip_ratio/low_min": 5.616821727016941e-05, "clip_ratio/region_mean": 0.002371167851379141, "epoch": 0.0879197802005495, "grad_norm": 0.1208011582493782, "learning_rate": 1e-06, "loss": -0.0252, "step": 942 }, { "clip_ratio/high_max": 0.002788424084428698, "clip_ratio/high_mean": 0.0011727850323950406, "clip_ratio/low_mean": 0.001143006174970651, "clip_ratio/low_min": 4.4686891669698525e-05, "clip_ratio/region_mean": 0.0023157911928137764, "epoch": 0.08801311330055009, "grad_norm": 0.117280974984169, "learning_rate": 1e-06, "loss": -0.0059, "step": 943 }, { "clip_ratio/high_max": 0.0025379768921993673, "clip_ratio/high_mean": 0.001176511068479158, "clip_ratio/low_mean": 0.0014114357654761989, "clip_ratio/low_min": 0.0001698935620879638, "clip_ratio/region_mean": 0.0025879468230414204, "epoch": 0.08810644640055067, "grad_norm": 0.11293445527553558, "learning_rate": 1e-06, "loss": 0.0408, "step": 944 }, { "clip_ratio/high_max": 0.002642636922246311, "clip_ratio/high_mean": 0.0011102025346190203, "clip_ratio/low_mean": 0.0013499246779247187, "clip_ratio/low_min": 4.9862807827594224e-05, "clip_ratio/region_mean": 0.002460127157974057, "epoch": 0.08819977950055125, "grad_norm": 0.111526720225811, "learning_rate": 1e-06, "loss": -0.0146, "step": 945 }, { "clip_ratio/high_max": 0.0025249352293030825, "clip_ratio/high_mean": 0.0011404987053538207, "clip_ratio/low_mean": 0.0013171319587854668, "clip_ratio/low_min": 0.00013225787915871479, "clip_ratio/region_mean": 0.0024576306823291816, "epoch": 0.08829311260055184, "grad_norm": 0.12684796750545502, "learning_rate": 1e-06, "loss": 0.0618, "step": 946 }, { "clip_ratio/high_max": 0.0027552930696401745, "clip_ratio/high_mean": 0.0011042953919968568, "clip_ratio/low_mean": 0.0013201149085944053, "clip_ratio/low_min": 7.282524893525988e-05, "clip_ratio/region_mean": 0.0024244102896773256, "epoch": 0.08838644570055242, "grad_norm": 0.16028104722499847, "learning_rate": 1e-06, "loss": 0.0161, "step": 947 }, { "clip_ratio/high_max": 0.0028171928133815527, "clip_ratio/high_mean": 0.0011875251211677096, "clip_ratio/low_mean": 0.0014720461149408948, "clip_ratio/low_min": 0.0001122882540585124, "clip_ratio/region_mean": 0.0026595712333801202, "epoch": 0.088479778800553, "grad_norm": 0.10995173454284668, "learning_rate": 1e-06, "loss": 0.0237, "step": 948 }, { "clip_ratio/high_max": 0.0026523575506871566, "clip_ratio/high_mean": 0.0011990972016064916, "clip_ratio/low_mean": 0.001458189912227681, "clip_ratio/low_min": 0.0001184472203021869, "clip_ratio/region_mean": 0.002657287062902469, "epoch": 0.08857311190055359, "grad_norm": 0.12484999001026154, "learning_rate": 1e-06, "loss": 0.0599, "step": 949 }, { "clip_ratio/high_max": 0.001983057001780253, "clip_ratio/high_mean": 0.0009208516785292886, "clip_ratio/low_mean": 0.001373267423332436, "clip_ratio/low_min": 0.00018387226828053826, "clip_ratio/region_mean": 0.002294119054568, "epoch": 0.08866644500055416, "grad_norm": 0.1285124272108078, "learning_rate": 1e-06, "loss": 0.0732, "step": 950 }, { "clip_ratio/high_max": 0.003028925355465617, "clip_ratio/high_mean": 0.001133382087573409, "clip_ratio/low_mean": 0.0012056826199113857, "clip_ratio/low_min": 0.00014831778753432445, "clip_ratio/region_mean": 0.0023390646747429855, "epoch": 0.08875977810055474, "grad_norm": 0.12723256647586823, "learning_rate": 1e-06, "loss": 0.012, "step": 951 }, { "clip_ratio/high_max": 0.002673895469342824, "clip_ratio/high_mean": 0.0011443130606494378, "clip_ratio/low_mean": 0.0013673353532794863, "clip_ratio/low_min": 0.00014478527464234503, "clip_ratio/region_mean": 0.0025116483884630725, "epoch": 0.08885311120055533, "grad_norm": 0.11702253669500351, "learning_rate": 1e-06, "loss": 0.0214, "step": 952 }, { "clip_ratio/high_max": 0.0028391328232828528, "clip_ratio/high_mean": 0.0012477754789870232, "clip_ratio/low_mean": 0.0014510735854855739, "clip_ratio/low_min": 8.068501119851135e-05, "clip_ratio/region_mean": 0.002698849064472597, "epoch": 0.08894644430055591, "grad_norm": 0.1425998955965042, "learning_rate": 1e-06, "loss": 0.0343, "step": 953 }, { "clip_ratio/high_max": 0.002632967552926857, "clip_ratio/high_mean": 0.0012115016870666295, "clip_ratio/low_mean": 0.0010972554591717198, "clip_ratio/low_min": 8.112301929941168e-05, "clip_ratio/region_mean": 0.00230875717534218, "epoch": 0.0890397774005565, "grad_norm": 0.10918445140123367, "learning_rate": 1e-06, "loss": -0.0242, "step": 954 }, { "clip_ratio/high_max": 0.002498929898138158, "clip_ratio/high_mean": 0.0011500391774461605, "clip_ratio/low_mean": 0.001282649376662448, "clip_ratio/low_min": 3.1408855647896416e-05, "clip_ratio/region_mean": 0.002432688510452863, "epoch": 0.08913311050055708, "grad_norm": 0.12890565395355225, "learning_rate": 1e-06, "loss": -0.0057, "step": 955 }, { "clip_ratio/high_max": 0.002750506053416757, "clip_ratio/high_mean": 0.0011363541871105554, "clip_ratio/low_mean": 0.0013804125374008436, "clip_ratio/low_min": 5.9311465520295314e-05, "clip_ratio/region_mean": 0.0025167667408823036, "epoch": 0.08922644360055766, "grad_norm": 0.12218194454908371, "learning_rate": 1e-06, "loss": 0.051, "step": 956 }, { "clip_ratio/high_max": 0.0027811216059490107, "clip_ratio/high_mean": 0.0012290315025893506, "clip_ratio/low_mean": 0.001322785497904988, "clip_ratio/low_min": 0.0001337063604296418, "clip_ratio/region_mean": 0.0025518170004943386, "epoch": 0.08931977670055825, "grad_norm": 0.12276507169008255, "learning_rate": 1e-06, "loss": -0.0153, "step": 957 }, { "clip_ratio/high_max": 0.0029567002347903326, "clip_ratio/high_mean": 0.0011472656005935278, "clip_ratio/low_mean": 0.0013763130918960087, "clip_ratio/low_min": 0.00018796045060298638, "clip_ratio/region_mean": 0.0025235787106794305, "epoch": 0.08941310980055883, "grad_norm": 0.1302059441804886, "learning_rate": 1e-06, "loss": 0.0607, "step": 958 }, { "clip_ratio/high_max": 0.0024740926673985086, "clip_ratio/high_mean": 0.0010495662318135146, "clip_ratio/low_mean": 0.001175986317321076, "clip_ratio/low_min": 2.317389771633316e-05, "clip_ratio/region_mean": 0.002225552514573792, "epoch": 0.08950644290055941, "grad_norm": 0.10669755190610886, "learning_rate": 1e-06, "loss": 0.0302, "step": 959 }, { "clip_ratio/high_max": 0.00262820185162127, "clip_ratio/high_mean": 0.0011092682179878466, "clip_ratio/low_mean": 0.0014082114503253251, "clip_ratio/low_min": 1.8058364730677567e-05, "clip_ratio/region_mean": 0.0025174795810016803, "epoch": 0.08959977600056, "grad_norm": 0.10697541385889053, "learning_rate": 1e-06, "loss": 0.0138, "step": 960 }, { "clip_ratio/high_max": 0.0027912798468605615, "clip_ratio/high_mean": 0.0012065249211445916, "clip_ratio/low_mean": 0.001237166488863295, "clip_ratio/low_min": 6.789954750274774e-05, "clip_ratio/region_mean": 0.0024436914318357594, "epoch": 0.08969310910056058, "grad_norm": 0.11158915609121323, "learning_rate": 1e-06, "loss": 0.0033, "step": 961 }, { "clip_ratio/high_max": 0.0029689383736695163, "clip_ratio/high_mean": 0.0013222349189163651, "clip_ratio/low_mean": 0.0012593958817888051, "clip_ratio/low_min": 5.276702177070547e-05, "clip_ratio/region_mean": 0.0025816308334469795, "epoch": 0.08978644220056116, "grad_norm": 0.12170516699552536, "learning_rate": 1e-06, "loss": -0.0051, "step": 962 }, { "clip_ratio/high_max": 0.002842773770680651, "clip_ratio/high_mean": 0.001290344156586798, "clip_ratio/low_mean": 0.0012439305537554901, "clip_ratio/low_min": 8.368101862288313e-05, "clip_ratio/region_mean": 0.002534274732170161, "epoch": 0.08987977530056175, "grad_norm": 0.11743982881307602, "learning_rate": 1e-06, "loss": -0.0007, "step": 963 }, { "clip_ratio/high_max": 0.002831010155205149, "clip_ratio/high_mean": 0.0012584279393195175, "clip_ratio/low_mean": 0.0013923864098615013, "clip_ratio/low_min": 0.00012306672942941077, "clip_ratio/region_mean": 0.002650814363732934, "epoch": 0.08997310840056233, "grad_norm": 0.11645185202360153, "learning_rate": 1e-06, "loss": 0.0485, "step": 964 }, { "clip_ratio/high_max": 0.002493349886208307, "clip_ratio/high_mean": 0.0011607019587245304, "clip_ratio/low_mean": 0.0013702734213438816, "clip_ratio/low_min": 0.00011183765309397131, "clip_ratio/region_mean": 0.002530975347326603, "epoch": 0.09006644150056292, "grad_norm": 0.12098593264818192, "learning_rate": 1e-06, "loss": 0.0289, "step": 965 }, { "clip_ratio/high_max": 0.0025933179349522106, "clip_ratio/high_mean": 0.0011341691497364081, "clip_ratio/low_mean": 0.001301575106481323, "clip_ratio/low_min": 0.00018550738059275318, "clip_ratio/region_mean": 0.002435744318063371, "epoch": 0.0901597746005635, "grad_norm": 0.11580844223499298, "learning_rate": 1e-06, "loss": 0.0213, "step": 966 }, { "clip_ratio/high_max": 0.0025339243074995466, "clip_ratio/high_mean": 0.0010514588302612538, "clip_ratio/low_mean": 0.0013844799650541972, "clip_ratio/low_min": 0.00012641030025406508, "clip_ratio/region_mean": 0.002435938782582525, "epoch": 0.09025310770056408, "grad_norm": 0.11469963937997818, "learning_rate": 1e-06, "loss": 0.0287, "step": 967 }, { "clip_ratio/high_max": 0.0026930405510938726, "clip_ratio/high_mean": 0.001141781423939392, "clip_ratio/low_mean": 0.0014822542798356153, "clip_ratio/low_min": 0.00023987569147720933, "clip_ratio/region_mean": 0.002624035674671177, "epoch": 0.09034644080056467, "grad_norm": 0.12461795657873154, "learning_rate": 1e-06, "loss": 0.0139, "step": 968 }, { "clip_ratio/high_max": 0.0023683330746280262, "clip_ratio/high_mean": 0.0011057031997552258, "clip_ratio/low_mean": 0.0014790066234127153, "clip_ratio/low_min": 0.00011092543354607187, "clip_ratio/region_mean": 0.0025847098586382344, "epoch": 0.09043977390056525, "grad_norm": 0.16061313450336456, "learning_rate": 1e-06, "loss": 0.0515, "step": 969 }, { "clip_ratio/high_max": 0.0028513290744740516, "clip_ratio/high_mean": 0.001155625957835582, "clip_ratio/low_mean": 0.001233182741998462, "clip_ratio/low_min": 2.111361300194403e-05, "clip_ratio/region_mean": 0.0023888087598606944, "epoch": 0.09053310700056583, "grad_norm": 0.12160170823335648, "learning_rate": 1e-06, "loss": 0.0288, "step": 970 }, { "clip_ratio/high_max": 0.0026947276091959793, "clip_ratio/high_mean": 0.0012257949838385684, "clip_ratio/low_mean": 0.0014641191810369492, "clip_ratio/low_min": 0.00023094736206985544, "clip_ratio/region_mean": 0.0026899141812464222, "epoch": 0.09062644010056642, "grad_norm": 0.11060559749603271, "learning_rate": 1e-06, "loss": 0.0276, "step": 971 }, { "clip_ratio/high_max": 0.0031339486376964487, "clip_ratio/high_mean": 0.0012617829997907393, "clip_ratio/low_mean": 0.0012479627057473408, "clip_ratio/low_min": 0.0001483262240071781, "clip_ratio/region_mean": 0.0025097457400988787, "epoch": 0.090719773200567, "grad_norm": 0.12609367072582245, "learning_rate": 1e-06, "loss": -0.0391, "step": 972 }, { "clip_ratio/high_max": 0.0024247981345979497, "clip_ratio/high_mean": 0.0010785798913275357, "clip_ratio/low_mean": 0.001235927891684696, "clip_ratio/low_min": 1.2700670595222618e-05, "clip_ratio/region_mean": 0.0023145077866502106, "epoch": 0.09081310630056758, "grad_norm": 0.11527407914400101, "learning_rate": 1e-06, "loss": 0.0556, "step": 973 }, { "clip_ratio/high_max": 0.0026806148161995225, "clip_ratio/high_mean": 0.001233730454259785, "clip_ratio/low_mean": 0.001235786310644471, "clip_ratio/low_min": 0.00014530838598147966, "clip_ratio/region_mean": 0.002469516795827076, "epoch": 0.09090643940056817, "grad_norm": 0.12235342711210251, "learning_rate": 1e-06, "loss": 0.0086, "step": 974 }, { "clip_ratio/high_max": 0.0026093997003044933, "clip_ratio/high_mean": 0.0012172102760814596, "clip_ratio/low_mean": 0.0013219025240687188, "clip_ratio/low_min": 0.0001458983479096787, "clip_ratio/region_mean": 0.0025391128583578393, "epoch": 0.09099977250056875, "grad_norm": 0.1196315586566925, "learning_rate": 1e-06, "loss": 0.0337, "step": 975 }, { "clip_ratio/high_max": 0.002150374373741215, "clip_ratio/high_mean": 0.0010012361999542918, "clip_ratio/low_mean": 0.0015102154829946812, "clip_ratio/low_min": 0.00013764554660156136, "clip_ratio/region_mean": 0.0025114516756730154, "epoch": 0.09109310560056934, "grad_norm": 0.10887237638235092, "learning_rate": 1e-06, "loss": 0.0559, "step": 976 }, { "clip_ratio/high_max": 0.0022300366836134344, "clip_ratio/high_mean": 0.0009779280953807756, "clip_ratio/low_mean": 0.0013395770729403012, "clip_ratio/low_min": 0.00014393094170372933, "clip_ratio/region_mean": 0.0023175052338046953, "epoch": 0.09118643870056992, "grad_norm": 0.10701128095388412, "learning_rate": 1e-06, "loss": 0.0258, "step": 977 }, { "clip_ratio/high_max": 0.003646559787739534, "clip_ratio/high_mean": 0.0014837396593065932, "clip_ratio/low_mean": 0.0012861792092735413, "clip_ratio/low_min": 0.00010775718510558363, "clip_ratio/region_mean": 0.0027699188285623677, "epoch": 0.0912797718005705, "grad_norm": 0.11658689379692078, "learning_rate": 1e-06, "loss": 0.0034, "step": 978 }, { "clip_ratio/high_max": 0.0028070593762095086, "clip_ratio/high_mean": 0.0011891994799952954, "clip_ratio/low_mean": 0.0014283322416304145, "clip_ratio/low_min": 6.652608317381237e-05, "clip_ratio/region_mean": 0.0026175316888839006, "epoch": 0.09137310490057109, "grad_norm": 6.494184494018555, "learning_rate": 1e-06, "loss": 0.033, "step": 979 }, { "clip_ratio/high_max": 0.00276714709616499, "clip_ratio/high_mean": 0.0011871292226715013, "clip_ratio/low_mean": 0.0013687313330592588, "clip_ratio/low_min": 0.0002524561386962887, "clip_ratio/region_mean": 0.0025558605848345906, "epoch": 0.09146643800057166, "grad_norm": 0.11277235299348831, "learning_rate": 1e-06, "loss": 0.0335, "step": 980 }, { "clip_ratio/high_max": 0.002775694170850329, "clip_ratio/high_mean": 0.0012744389459840022, "clip_ratio/low_mean": 0.0012568929014378227, "clip_ratio/low_min": 3.302860750409309e-05, "clip_ratio/region_mean": 0.0025313318910775706, "epoch": 0.09155977110057224, "grad_norm": 0.10077925026416779, "learning_rate": 1e-06, "loss": 0.0004, "step": 981 }, { "clip_ratio/high_max": 0.0030001112245372497, "clip_ratio/high_mean": 0.0012147217676101718, "clip_ratio/low_mean": 0.0017618795081943972, "clip_ratio/low_min": 0.00010447325803397689, "clip_ratio/region_mean": 0.002976601383124944, "epoch": 0.09165310420057284, "grad_norm": 0.16938641667366028, "learning_rate": 1e-06, "loss": 0.0616, "step": 982 }, { "clip_ratio/high_max": 0.0026180426066275686, "clip_ratio/high_mean": 0.0011985322344116867, "clip_ratio/low_mean": 0.0014680899221275467, "clip_ratio/low_min": 6.433726957766339e-05, "clip_ratio/region_mean": 0.002666622218384873, "epoch": 0.09174643730057341, "grad_norm": 0.11450305581092834, "learning_rate": 1e-06, "loss": 0.0217, "step": 983 }, { "clip_ratio/high_max": 0.0023163015284808353, "clip_ratio/high_mean": 0.001109094126150012, "clip_ratio/low_mean": 0.0014027495199115947, "clip_ratio/low_min": 0.00015151396837609354, "clip_ratio/region_mean": 0.0025118436315096915, "epoch": 0.09183977040057399, "grad_norm": 0.12174700200557709, "learning_rate": 1e-06, "loss": 0.0199, "step": 984 }, { "clip_ratio/high_max": 0.002674655268492643, "clip_ratio/high_mean": 0.0010775317787192762, "clip_ratio/low_mean": 0.0015676867806178052, "clip_ratio/low_min": 0.00015655748848075746, "clip_ratio/region_mean": 0.0026452185556991026, "epoch": 0.09193310350057458, "grad_norm": 0.1115269809961319, "learning_rate": 1e-06, "loss": 0.0485, "step": 985 }, { "clip_ratio/high_max": 0.002467248596076388, "clip_ratio/high_mean": 0.001117363128287252, "clip_ratio/low_mean": 0.0011798180385085288, "clip_ratio/low_min": 5.335040623322129e-05, "clip_ratio/region_mean": 0.0022971812068135478, "epoch": 0.09202643660057516, "grad_norm": 0.10868289321660995, "learning_rate": 1e-06, "loss": 0.0017, "step": 986 }, { "clip_ratio/high_max": 0.002487644786015153, "clip_ratio/high_mean": 0.001049490732839331, "clip_ratio/low_mean": 0.0015151495681493543, "clip_ratio/low_min": 5.959920508757932e-05, "clip_ratio/region_mean": 0.0025646403009886853, "epoch": 0.09211976970057575, "grad_norm": 0.11784178763628006, "learning_rate": 1e-06, "loss": 0.0459, "step": 987 }, { "clip_ratio/high_max": 0.0027295782456349116, "clip_ratio/high_mean": 0.0011878538298333297, "clip_ratio/low_mean": 0.0014676620448881295, "clip_ratio/low_min": 5.278142816678155e-05, "clip_ratio/region_mean": 0.002655515883816406, "epoch": 0.09221310280057633, "grad_norm": 0.1076555848121643, "learning_rate": 1e-06, "loss": 0.0424, "step": 988 }, { "clip_ratio/high_max": 0.002901508785726037, "clip_ratio/high_mean": 0.0013105931066093035, "clip_ratio/low_mean": 0.0015459497699339408, "clip_ratio/low_min": 7.488697792723542e-05, "clip_ratio/region_mean": 0.0028565428874571808, "epoch": 0.09230643590057691, "grad_norm": 0.1147349551320076, "learning_rate": 1e-06, "loss": 0.0205, "step": 989 }, { "clip_ratio/high_max": 0.0025767290571820922, "clip_ratio/high_mean": 0.0010862722829187987, "clip_ratio/low_mean": 0.0013963995079393499, "clip_ratio/low_min": 0.0001059698329299863, "clip_ratio/region_mean": 0.00248267175629735, "epoch": 0.0923997690005775, "grad_norm": 0.11017246544361115, "learning_rate": 1e-06, "loss": 0.0361, "step": 990 }, { "clip_ratio/high_max": 0.0026134031286346726, "clip_ratio/high_mean": 0.00136757614382077, "clip_ratio/low_mean": 0.001320318890066119, "clip_ratio/low_min": 0.0001321766339970054, "clip_ratio/region_mean": 0.0026878950448008254, "epoch": 0.09249310210057808, "grad_norm": 0.12110080569982529, "learning_rate": 1e-06, "loss": 0.0236, "step": 991 }, { "clip_ratio/high_max": 0.0029008361852902453, "clip_ratio/high_mean": 0.0012395746580295963, "clip_ratio/low_mean": 0.001359284982754616, "clip_ratio/low_min": 0.00010582365393929649, "clip_ratio/region_mean": 0.002598859660793096, "epoch": 0.09258643520057866, "grad_norm": 0.12579837441444397, "learning_rate": 1e-06, "loss": 0.0363, "step": 992 }, { "clip_ratio/high_max": 0.003535606461809948, "clip_ratio/high_mean": 0.0014662159810541198, "clip_ratio/low_mean": 0.0014731299452250823, "clip_ratio/low_min": 0.00014710106916027144, "clip_ratio/region_mean": 0.002939345868071541, "epoch": 0.09267976830057925, "grad_norm": 0.14476199448108673, "learning_rate": 1e-06, "loss": -0.0148, "step": 993 }, { "clip_ratio/high_max": 0.002774726308416575, "clip_ratio/high_mean": 0.0012344773749646265, "clip_ratio/low_mean": 0.001647737153689377, "clip_ratio/low_min": 0.00011739384763131966, "clip_ratio/region_mean": 0.0028822145686717704, "epoch": 0.09277310140057983, "grad_norm": 0.11765889823436737, "learning_rate": 1e-06, "loss": 0.0779, "step": 994 }, { "clip_ratio/high_max": 0.003133786449325271, "clip_ratio/high_mean": 0.001201414084789576, "clip_ratio/low_mean": 0.0014623204842791893, "clip_ratio/low_min": 7.799505237926496e-05, "clip_ratio/region_mean": 0.002663734558154829, "epoch": 0.09286643450058042, "grad_norm": 0.12022411078214645, "learning_rate": 1e-06, "loss": 0.0138, "step": 995 }, { "clip_ratio/high_max": 0.0030907098116585985, "clip_ratio/high_mean": 0.0012947207214892842, "clip_ratio/low_mean": 0.0014917460557626327, "clip_ratio/low_min": 0.00011814427671197336, "clip_ratio/region_mean": 0.0027864666990353726, "epoch": 0.092959767600581, "grad_norm": 0.1165364682674408, "learning_rate": 1e-06, "loss": -0.0012, "step": 996 }, { "clip_ratio/high_max": 0.003275159055192489, "clip_ratio/high_mean": 0.0012671573895204347, "clip_ratio/low_mean": 0.0017015972880471963, "clip_ratio/low_min": 0.0001549023982079234, "clip_ratio/region_mean": 0.002968754735775292, "epoch": 0.09305310070058158, "grad_norm": 0.10712571442127228, "learning_rate": 1e-06, "loss": 0.0455, "step": 997 }, { "clip_ratio/high_max": 0.0024871829300536774, "clip_ratio/high_mean": 0.001137826408012188, "clip_ratio/low_mean": 0.0016198411758523434, "clip_ratio/low_min": 0.0001743524244375294, "clip_ratio/region_mean": 0.002757667549303733, "epoch": 0.09314643380058217, "grad_norm": 0.11063006520271301, "learning_rate": 1e-06, "loss": 0.0325, "step": 998 }, { "clip_ratio/high_max": 0.0027525357509148307, "clip_ratio/high_mean": 0.0011536253332451452, "clip_ratio/low_mean": 0.0017016248202708084, "clip_ratio/low_min": 0.0002547297553974204, "clip_ratio/region_mean": 0.00285525007348042, "epoch": 0.09323976690058275, "grad_norm": 0.1114213764667511, "learning_rate": 1e-06, "loss": 0.0564, "step": 999 }, { "clip_ratio/high_max": 0.002417416326352395, "clip_ratio/high_mean": 0.001042430028974195, "clip_ratio/low_mean": 0.0017356326097797137, "clip_ratio/low_min": 0.00018953188828163547, "clip_ratio/region_mean": 0.002778062662400771, "epoch": 0.09333310000058333, "grad_norm": 0.14488564431667328, "learning_rate": 1e-06, "loss": 0.0964, "step": 1000 }, { "clip_ratio/high_max": 0.0028707007149932906, "clip_ratio/high_mean": 0.001260689619812183, "clip_ratio/low_mean": 0.0014072748344915453, "clip_ratio/low_min": 8.791204891167581e-05, "clip_ratio/region_mean": 0.0026679644361138344, "epoch": 0.09342643310058392, "grad_norm": 0.12729983031749725, "learning_rate": 1e-06, "loss": -0.0002, "step": 1001 }, { "clip_ratio/high_max": 0.0029966982765472494, "clip_ratio/high_mean": 0.001215299365867395, "clip_ratio/low_mean": 0.001570864817040274, "clip_ratio/low_min": 0.00011974100380029995, "clip_ratio/region_mean": 0.0027861642302013934, "epoch": 0.0935197662005845, "grad_norm": 0.11563847959041595, "learning_rate": 1e-06, "loss": 0.0143, "step": 1002 }, { "clip_ratio/high_max": 0.002537423377361847, "clip_ratio/high_mean": 0.0010740548132162075, "clip_ratio/low_mean": 0.0016666319497744553, "clip_ratio/low_min": 0.00011701982657541521, "clip_ratio/region_mean": 0.0027406867520767264, "epoch": 0.09361309930058508, "grad_norm": 0.11845163255929947, "learning_rate": 1e-06, "loss": 0.0849, "step": 1003 }, { "clip_ratio/high_max": 0.002433712601487059, "clip_ratio/high_mean": 0.0010959279261442134, "clip_ratio/low_mean": 0.0014119522929831874, "clip_ratio/low_min": 6.466150807682425e-05, "clip_ratio/region_mean": 0.0025078802354983054, "epoch": 0.09370643240058567, "grad_norm": 0.14227983355522156, "learning_rate": 1e-06, "loss": 0.0768, "step": 1004 }, { "clip_ratio/high_max": 0.003072989289648831, "clip_ratio/high_mean": 0.0014026462522451766, "clip_ratio/low_mean": 0.0017797433320083655, "clip_ratio/low_min": 0.00019703758243849734, "clip_ratio/region_mean": 0.0031823895551497117, "epoch": 0.09379976550058625, "grad_norm": 0.1346999555826187, "learning_rate": 1e-06, "loss": 0.011, "step": 1005 }, { "clip_ratio/high_max": 0.0030411221814574674, "clip_ratio/high_mean": 0.001289644238568144, "clip_ratio/low_mean": 0.001281272227060981, "clip_ratio/low_min": 0.00013580263657786418, "clip_ratio/region_mean": 0.002570916447439231, "epoch": 0.09389309860058684, "grad_norm": 0.10416793078184128, "learning_rate": 1e-06, "loss": 0.0025, "step": 1006 }, { "clip_ratio/high_max": 0.002908468020905275, "clip_ratio/high_mean": 0.0012044460036122473, "clip_ratio/low_mean": 0.0013638799682667013, "clip_ratio/low_min": 0.00014062000991543755, "clip_ratio/region_mean": 0.0025683260246296413, "epoch": 0.09398643170058742, "grad_norm": 0.10859540849924088, "learning_rate": 1e-06, "loss": 0.046, "step": 1007 }, { "clip_ratio/high_max": 0.0029799915937473997, "clip_ratio/high_mean": 0.0012231259788677562, "clip_ratio/low_mean": 0.0013812620854878332, "clip_ratio/low_min": 1.975035593204666e-05, "clip_ratio/region_mean": 0.0026043879915960133, "epoch": 0.094079764800588, "grad_norm": 0.11374291032552719, "learning_rate": 1e-06, "loss": 0.0328, "step": 1008 }, { "clip_ratio/high_max": 0.002981277779326774, "clip_ratio/high_mean": 0.0012567323392431717, "clip_ratio/low_mean": 0.0013621409343613777, "clip_ratio/low_min": 9.466828487347811e-05, "clip_ratio/region_mean": 0.002618873208120931, "epoch": 0.09417309790058859, "grad_norm": 0.11963991075754166, "learning_rate": 1e-06, "loss": 0.037, "step": 1009 }, { "clip_ratio/high_max": 0.002924138847447466, "clip_ratio/high_mean": 0.0011684025303111412, "clip_ratio/low_mean": 0.0012690034182014642, "clip_ratio/low_min": 9.931239037541673e-05, "clip_ratio/region_mean": 0.002437405928503722, "epoch": 0.09426643100058917, "grad_norm": 0.1102556437253952, "learning_rate": 1e-06, "loss": 0.0162, "step": 1010 }, { "clip_ratio/high_max": 0.003490545117529109, "clip_ratio/high_mean": 0.001336478475423064, "clip_ratio/low_mean": 0.0014367848270921968, "clip_ratio/low_min": 3.6649937101174146e-05, "clip_ratio/region_mean": 0.002773263258859515, "epoch": 0.09435976410058974, "grad_norm": 0.10565842688083649, "learning_rate": 1e-06, "loss": 0.0118, "step": 1011 }, { "clip_ratio/high_max": 0.0024250521819340065, "clip_ratio/high_mean": 0.0011224260142626008, "clip_ratio/low_mean": 0.0015114995840121992, "clip_ratio/low_min": 0.0002255729923490435, "clip_ratio/region_mean": 0.002633925607369747, "epoch": 0.09445309720059034, "grad_norm": 0.11838560551404953, "learning_rate": 1e-06, "loss": 0.0583, "step": 1012 }, { "clip_ratio/high_max": 0.0032207021431531757, "clip_ratio/high_mean": 0.0014960345069994219, "clip_ratio/low_mean": 0.0013963352430437226, "clip_ratio/low_min": 0.00011786611321440432, "clip_ratio/region_mean": 0.002892369797336869, "epoch": 0.09454643030059091, "grad_norm": 0.1224701777100563, "learning_rate": 1e-06, "loss": -0.0047, "step": 1013 }, { "clip_ratio/high_max": 0.003352995991008356, "clip_ratio/high_mean": 0.001287487013541977, "clip_ratio/low_mean": 0.0016660987821524031, "clip_ratio/low_min": 8.061644257395528e-05, "clip_ratio/region_mean": 0.0029535857611335814, "epoch": 0.09463976340059149, "grad_norm": 0.35519301891326904, "learning_rate": 1e-06, "loss": 0.0438, "step": 1014 }, { "clip_ratio/high_max": 0.0029679783692699857, "clip_ratio/high_mean": 0.0012489094842749182, "clip_ratio/low_mean": 0.0015583560780214611, "clip_ratio/low_min": 9.543924352328759e-05, "clip_ratio/region_mean": 0.0028072655622963794, "epoch": 0.09473309650059208, "grad_norm": 0.11893009394407272, "learning_rate": 1e-06, "loss": 0.0285, "step": 1015 }, { "clip_ratio/high_max": 0.002471135536325164, "clip_ratio/high_mean": 0.0011829351933556609, "clip_ratio/low_mean": 0.001563579364301404, "clip_ratio/low_min": 0.00017570037471159594, "clip_ratio/region_mean": 0.002746514554019086, "epoch": 0.09482642960059266, "grad_norm": 0.11624384671449661, "learning_rate": 1e-06, "loss": 0.0412, "step": 1016 }, { "clip_ratio/high_max": 0.0030758403736399487, "clip_ratio/high_mean": 0.0012291601997276302, "clip_ratio/low_mean": 0.0015185346273938194, "clip_ratio/low_min": 0.00016362998030672316, "clip_ratio/region_mean": 0.002747694874415174, "epoch": 0.09491976270059325, "grad_norm": 0.11920436471700668, "learning_rate": 1e-06, "loss": 0.0543, "step": 1017 }, { "clip_ratio/high_max": 0.0025371479569002986, "clip_ratio/high_mean": 0.001281979424675228, "clip_ratio/low_mean": 0.0014869609440211207, "clip_ratio/low_min": 0.00013216199113230687, "clip_ratio/region_mean": 0.0027689404087141156, "epoch": 0.09501309580059383, "grad_norm": 0.11914224177598953, "learning_rate": 1e-06, "loss": 0.0087, "step": 1018 }, { "clip_ratio/high_max": 0.0031767375403433107, "clip_ratio/high_mean": 0.0014190832698659506, "clip_ratio/low_mean": 0.001348505960777402, "clip_ratio/low_min": 7.851443388062762e-05, "clip_ratio/region_mean": 0.0027675891542457975, "epoch": 0.09510642890059441, "grad_norm": 0.10922086983919144, "learning_rate": 1e-06, "loss": -0.0615, "step": 1019 }, { "clip_ratio/high_max": 0.0029020875299465843, "clip_ratio/high_mean": 0.0014033407896931749, "clip_ratio/low_mean": 0.0012925723640364595, "clip_ratio/low_min": 0.00011625457773334347, "clip_ratio/region_mean": 0.0026959132592310198, "epoch": 0.095199762000595, "grad_norm": 0.11653022468090057, "learning_rate": 1e-06, "loss": -0.0014, "step": 1020 }, { "clip_ratio/high_max": 0.0028071497690689284, "clip_ratio/high_mean": 0.0012579893627844285, "clip_ratio/low_mean": 0.0013232964010967407, "clip_ratio/low_min": 0.0001721415319480002, "clip_ratio/region_mean": 0.0025812857638811693, "epoch": 0.09529309510059558, "grad_norm": 0.11123798787593842, "learning_rate": 1e-06, "loss": 0.0155, "step": 1021 }, { "clip_ratio/high_max": 0.0029149906258680858, "clip_ratio/high_mean": 0.0011823010245279875, "clip_ratio/low_mean": 0.0017846755290520377, "clip_ratio/low_min": 0.00018575931153463898, "clip_ratio/region_mean": 0.0029669764699065126, "epoch": 0.09538642820059616, "grad_norm": 0.11519214510917664, "learning_rate": 1e-06, "loss": 0.0355, "step": 1022 }, { "clip_ratio/high_max": 0.0028542855216073804, "clip_ratio/high_mean": 0.0011931294939131476, "clip_ratio/low_mean": 0.0014672696561319754, "clip_ratio/low_min": 0.00017124263922596583, "clip_ratio/region_mean": 0.0026603991937008686, "epoch": 0.09547976130059675, "grad_norm": 0.12846031785011292, "learning_rate": 1e-06, "loss": 0.0436, "step": 1023 }, { "clip_ratio/high_max": 0.002703148595173843, "clip_ratio/high_mean": 0.0012149533249612432, "clip_ratio/low_mean": 0.001366138178127585, "clip_ratio/low_min": 0.00016187837354664225, "clip_ratio/region_mean": 0.0025810914958128706, "epoch": 0.09557309440059733, "grad_norm": 0.10476890206336975, "learning_rate": 1e-06, "loss": 0.0124, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010105678013392905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 607.3308715820312, "completions/mean_terminated_length": 571.7156372070312, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.09566642750059791, "grad_norm": 0.11734706908464432, "learning_rate": 1e-06, "loss": 0.0358, "num_tokens": 728577627.0, "reward": 0.6058088541030884, "reward_std": 0.18527096509933472, "rewards/simpleverify_reward/mean": 0.6058087944984436, "rewards/simpleverify_reward/std": 0.488678514957428, "step": 1025 }, { "clip_ratio/high_max": 0.002203545438533183, "clip_ratio/high_mean": 0.0008861995393090183, "clip_ratio/low_mean": 0.0004953558945999248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001381555455736816, "epoch": 0.0957597606005985, "grad_norm": 0.11628786474466324, "learning_rate": 1e-06, "loss": 0.0061, "step": 1026 }, { "clip_ratio/high_max": 0.0019790424448729027, "clip_ratio/high_mean": 0.0008057031436692341, "clip_ratio/low_mean": 0.000578062220938591, "clip_ratio/low_min": 2.3079763195710257e-05, "clip_ratio/region_mean": 0.0013837653641530778, "epoch": 0.09585309370059908, "grad_norm": 0.11739090830087662, "learning_rate": 1e-06, "loss": 0.0209, "step": 1027 }, { "clip_ratio/high_max": 0.002154977591999341, "clip_ratio/high_mean": 0.000874872150234296, "clip_ratio/low_mean": 0.0006546993681695312, "clip_ratio/low_min": 2.7606633011600934e-05, "clip_ratio/region_mean": 0.0015295714983949438, "epoch": 0.09594642680059967, "grad_norm": 0.11764016002416611, "learning_rate": 1e-06, "loss": 0.0304, "step": 1028 }, { "clip_ratio/high_max": 0.0021571518445853144, "clip_ratio/high_mean": 0.0009240433573722839, "clip_ratio/low_mean": 0.0005817247983941343, "clip_ratio/low_min": 4.2850915633607656e-05, "clip_ratio/region_mean": 0.0015057681739563122, "epoch": 0.09603975990060025, "grad_norm": 0.12174678593873978, "learning_rate": 1e-06, "loss": -0.007, "step": 1029 }, { "clip_ratio/high_max": 0.0024027827821555547, "clip_ratio/high_mean": 0.000912268107640557, "clip_ratio/low_mean": 0.0007820751652616309, "clip_ratio/low_min": 9.473861882725032e-05, "clip_ratio/region_mean": 0.0016943432347034104, "epoch": 0.09613309300060083, "grad_norm": 0.11720043420791626, "learning_rate": 1e-06, "loss": 0.0351, "step": 1030 }, { "clip_ratio/high_max": 0.00186784199468093, "clip_ratio/high_mean": 0.0008012004655029159, "clip_ratio/low_mean": 0.0007481279699277366, "clip_ratio/low_min": 3.0222437999327667e-05, "clip_ratio/region_mean": 0.0015493284117837902, "epoch": 0.09622642610060142, "grad_norm": 0.18158195912837982, "learning_rate": 1e-06, "loss": 0.0453, "step": 1031 }, { "clip_ratio/high_max": 0.002229470032034442, "clip_ratio/high_mean": 0.0008827615674817935, "clip_ratio/low_mean": 0.0009120985141635174, "clip_ratio/low_min": 8.200300544558559e-05, "clip_ratio/region_mean": 0.0017948600579984486, "epoch": 0.096319759200602, "grad_norm": 0.11961593478918076, "learning_rate": 1e-06, "loss": 0.0428, "step": 1032 }, { "clip_ratio/high_max": 0.001949259705725126, "clip_ratio/high_mean": 0.0009293647708545905, "clip_ratio/low_mean": 0.0006682530784019036, "clip_ratio/low_min": 8.57691793498816e-06, "clip_ratio/region_mean": 0.0015976178765413351, "epoch": 0.09641309230060258, "grad_norm": 0.11553668975830078, "learning_rate": 1e-06, "loss": -0.0225, "step": 1033 }, { "clip_ratio/high_max": 0.0020059601301909424, "clip_ratio/high_mean": 0.000866851016326109, "clip_ratio/low_mean": 0.0007999814642971614, "clip_ratio/low_min": 6.335255329759093e-05, "clip_ratio/region_mean": 0.0016668324824422598, "epoch": 0.09650642540060317, "grad_norm": 0.11509881168603897, "learning_rate": 1e-06, "loss": 0.0288, "step": 1034 }, { "clip_ratio/high_max": 0.002290750271640718, "clip_ratio/high_mean": 0.0010511786113056587, "clip_ratio/low_mean": 0.0007876292620494496, "clip_ratio/low_min": 4.162153345532715e-05, "clip_ratio/region_mean": 0.0018388078351563308, "epoch": 0.09659975850060375, "grad_norm": 0.11182078719139099, "learning_rate": 1e-06, "loss": -0.0183, "step": 1035 }, { "clip_ratio/high_max": 0.0023164551821537316, "clip_ratio/high_mean": 0.0009914912152453326, "clip_ratio/low_mean": 0.000819183254861855, "clip_ratio/low_min": 3.7730063013441395e-05, "clip_ratio/region_mean": 0.0018106744610122405, "epoch": 0.09669309160060433, "grad_norm": 0.12165460735559464, "learning_rate": 1e-06, "loss": 0.0096, "step": 1036 }, { "clip_ratio/high_max": 0.0024439232729491778, "clip_ratio/high_mean": 0.0009857240274868673, "clip_ratio/low_mean": 0.0007959898302942747, "clip_ratio/low_min": 3.829734396276763e-05, "clip_ratio/region_mean": 0.0017817138141253963, "epoch": 0.09678642470060492, "grad_norm": 0.11259244382381439, "learning_rate": 1e-06, "loss": -0.0017, "step": 1037 }, { "clip_ratio/high_max": 0.0019664743886096403, "clip_ratio/high_mean": 0.0007329306408792036, "clip_ratio/low_mean": 0.0008232615400629584, "clip_ratio/low_min": 5.787256031908328e-05, "clip_ratio/region_mean": 0.0015561921827611513, "epoch": 0.0968797578006055, "grad_norm": 0.1141917034983635, "learning_rate": 1e-06, "loss": 0.0429, "step": 1038 }, { "clip_ratio/high_max": 0.0023949110400280915, "clip_ratio/high_mean": 0.0009441434631298762, "clip_ratio/low_mean": 0.0008892881432984723, "clip_ratio/low_min": 6.639495586568955e-05, "clip_ratio/region_mean": 0.0018334316191612743, "epoch": 0.09697309090060609, "grad_norm": 0.10631438344717026, "learning_rate": 1e-06, "loss": -0.0005, "step": 1039 }, { "clip_ratio/high_max": 0.0028446987307688687, "clip_ratio/high_mean": 0.0011105857956863474, "clip_ratio/low_mean": 0.001017337981465971, "clip_ratio/low_min": 0.00010017254317062907, "clip_ratio/region_mean": 0.0021279238135321066, "epoch": 0.09706642400060667, "grad_norm": 0.11398673057556152, "learning_rate": 1e-06, "loss": -0.0016, "step": 1040 }, { "clip_ratio/high_max": 0.0024882530415197834, "clip_ratio/high_mean": 0.0009914970560203074, "clip_ratio/low_mean": 0.0008791560558165656, "clip_ratio/low_min": 4.821873881155625e-05, "clip_ratio/region_mean": 0.0018706530900090002, "epoch": 0.09715975710060724, "grad_norm": 0.10839464515447617, "learning_rate": 1e-06, "loss": 0.0197, "step": 1041 }, { "clip_ratio/high_max": 0.002378000812313985, "clip_ratio/high_mean": 0.0010027813077613246, "clip_ratio/low_mean": 0.0009191029876092216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001921884271723684, "epoch": 0.09725309020060784, "grad_norm": 0.11780335754156113, "learning_rate": 1e-06, "loss": -0.013, "step": 1042 }, { "clip_ratio/high_max": 0.002430081120110117, "clip_ratio/high_mean": 0.0009942003889591433, "clip_ratio/low_mean": 0.0009172251120617148, "clip_ratio/low_min": 6.252175626286771e-05, "clip_ratio/region_mean": 0.0019114254755550064, "epoch": 0.09734642330060841, "grad_norm": 0.11445480585098267, "learning_rate": 1e-06, "loss": 0.0146, "step": 1043 }, { "clip_ratio/high_max": 0.001884766861621756, "clip_ratio/high_mean": 0.0008490565087413415, "clip_ratio/low_mean": 0.0010101776260853512, "clip_ratio/low_min": 0.0001224055959028192, "clip_ratio/region_mean": 0.001859234158473555, "epoch": 0.09743975640060899, "grad_norm": 0.12587323784828186, "learning_rate": 1e-06, "loss": 0.0129, "step": 1044 }, { "clip_ratio/high_max": 0.002351960061787395, "clip_ratio/high_mean": 0.0009211548076564213, "clip_ratio/low_mean": 0.0011607014275796246, "clip_ratio/low_min": 0.000128675425912661, "clip_ratio/region_mean": 0.0020818562334170565, "epoch": 0.09753308950060958, "grad_norm": 0.11390280723571777, "learning_rate": 1e-06, "loss": 0.0178, "step": 1045 }, { "clip_ratio/high_max": 0.0020965763214917388, "clip_ratio/high_mean": 0.0008331206827278947, "clip_ratio/low_mean": 0.0010927095499937423, "clip_ratio/low_min": 4.396080930746393e-05, "clip_ratio/region_mean": 0.0019258302127127536, "epoch": 0.09762642260061016, "grad_norm": 0.12111048400402069, "learning_rate": 1e-06, "loss": 0.0533, "step": 1046 }, { "clip_ratio/high_max": 0.0024366968573303893, "clip_ratio/high_mean": 0.001031734680509544, "clip_ratio/low_mean": 0.0011139175039716065, "clip_ratio/low_min": 4.6712941184523515e-05, "clip_ratio/region_mean": 0.0021456522517837584, "epoch": 0.09771975570061076, "grad_norm": 0.12409225106239319, "learning_rate": 1e-06, "loss": -0.0302, "step": 1047 }, { "clip_ratio/high_max": 0.002305422880453989, "clip_ratio/high_mean": 0.0009752197820489528, "clip_ratio/low_mean": 0.0011876127846335294, "clip_ratio/low_min": 0.0001382908067171229, "clip_ratio/region_mean": 0.0021628325121128, "epoch": 0.09781308880061133, "grad_norm": 0.12369311600923538, "learning_rate": 1e-06, "loss": 0.0322, "step": 1048 }, { "clip_ratio/high_max": 0.0024026472310652025, "clip_ratio/high_mean": 0.0009585549669282045, "clip_ratio/low_mean": 0.0011513419631228317, "clip_ratio/low_min": 5.141630845173495e-05, "clip_ratio/region_mean": 0.0021098969082231633, "epoch": 0.09790642190061191, "grad_norm": 0.1379605531692505, "learning_rate": 1e-06, "loss": 0.0039, "step": 1049 }, { "clip_ratio/high_max": 0.0024780290223134216, "clip_ratio/high_mean": 0.0009574517334840493, "clip_ratio/low_mean": 0.0009234890603693202, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018809408429660834, "epoch": 0.0979997550006125, "grad_norm": 0.10189446061849594, "learning_rate": 1e-06, "loss": -0.0099, "step": 1050 }, { "clip_ratio/high_max": 0.0024892426663427614, "clip_ratio/high_mean": 0.0010142828468815424, "clip_ratio/low_mean": 0.0011273604941379745, "clip_ratio/low_min": 9.914198017213494e-05, "clip_ratio/region_mean": 0.002141643373761326, "epoch": 0.09809308810061308, "grad_norm": 0.1702851802110672, "learning_rate": 1e-06, "loss": 0.0582, "step": 1051 }, { "clip_ratio/high_max": 0.0021126051506143995, "clip_ratio/high_mean": 0.0009414844571438152, "clip_ratio/low_mean": 0.001179920824142755, "clip_ratio/low_min": 5.6766763009363785e-05, "clip_ratio/region_mean": 0.0021214052831055596, "epoch": 0.09818642120061366, "grad_norm": 0.13180114328861237, "learning_rate": 1e-06, "loss": 0.0283, "step": 1052 }, { "clip_ratio/high_max": 0.0025132970768027008, "clip_ratio/high_mean": 0.0011431561506469734, "clip_ratio/low_mean": 0.0012076263828930678, "clip_ratio/low_min": 5.9605768910842016e-05, "clip_ratio/region_mean": 0.0023507824807893485, "epoch": 0.09827975430061425, "grad_norm": 0.13643735647201538, "learning_rate": 1e-06, "loss": 0.0461, "step": 1053 }, { "clip_ratio/high_max": 0.002515937536372803, "clip_ratio/high_mean": 0.0011588189663598314, "clip_ratio/low_mean": 0.001067479237462976, "clip_ratio/low_min": 8.296840314869769e-05, "clip_ratio/region_mean": 0.002226298143796157, "epoch": 0.09837308740061483, "grad_norm": 0.11702553182840347, "learning_rate": 1e-06, "loss": -0.0292, "step": 1054 }, { "clip_ratio/high_max": 0.0023767808961565606, "clip_ratio/high_mean": 0.0010425375839986373, "clip_ratio/low_mean": 0.0010662447875802172, "clip_ratio/low_min": 7.189812276919838e-05, "clip_ratio/region_mean": 0.002108782373397844, "epoch": 0.09846642050061541, "grad_norm": 0.11354535818099976, "learning_rate": 1e-06, "loss": -0.0198, "step": 1055 }, { "clip_ratio/high_max": 0.002434593850921374, "clip_ratio/high_mean": 0.0009861520084086806, "clip_ratio/low_mean": 0.001331477120402269, "clip_ratio/low_min": 0.000134748039272381, "clip_ratio/region_mean": 0.0023176291069830768, "epoch": 0.098559753600616, "grad_norm": 0.11405406892299652, "learning_rate": 1e-06, "loss": 0.0327, "step": 1056 }, { "clip_ratio/high_max": 0.0022987370612099767, "clip_ratio/high_mean": 0.0009943765180651098, "clip_ratio/low_mean": 0.0013056318202870898, "clip_ratio/low_min": 0.00010054455833596876, "clip_ratio/region_mean": 0.0023000083238002844, "epoch": 0.09865308670061658, "grad_norm": 0.11773454397916794, "learning_rate": 1e-06, "loss": 0.0211, "step": 1057 }, { "clip_ratio/high_max": 0.0024161640540114604, "clip_ratio/high_mean": 0.0009474781018070644, "clip_ratio/low_mean": 0.001187206349641201, "clip_ratio/low_min": 5.713464997825213e-05, "clip_ratio/region_mean": 0.002134684422344435, "epoch": 0.09874641980061717, "grad_norm": 0.10381419956684113, "learning_rate": 1e-06, "loss": 0.0322, "step": 1058 }, { "clip_ratio/high_max": 0.002561836015956942, "clip_ratio/high_mean": 0.001045305689331144, "clip_ratio/low_mean": 0.0010182710029766895, "clip_ratio/low_min": 6.0208321883692406e-05, "clip_ratio/region_mean": 0.002063576685031876, "epoch": 0.09883975290061775, "grad_norm": 0.13515760004520416, "learning_rate": 1e-06, "loss": 0.0179, "step": 1059 }, { "clip_ratio/high_max": 0.002278441490489058, "clip_ratio/high_mean": 0.0010177104068134213, "clip_ratio/low_mean": 0.001230415622558212, "clip_ratio/low_min": 0.00015775989413668867, "clip_ratio/region_mean": 0.0022481260166387074, "epoch": 0.09893308600061833, "grad_norm": 0.1366504579782486, "learning_rate": 1e-06, "loss": 0.0433, "step": 1060 }, { "clip_ratio/high_max": 0.0024114710176945664, "clip_ratio/high_mean": 0.0009586821852280991, "clip_ratio/low_mean": 0.0013040469275438227, "clip_ratio/low_min": 6.107902754592942e-05, "clip_ratio/region_mean": 0.002262729154608678, "epoch": 0.09902641910061892, "grad_norm": 0.09972595423460007, "learning_rate": 1e-06, "loss": 0.0395, "step": 1061 }, { "clip_ratio/high_max": 0.0021520118170883507, "clip_ratio/high_mean": 0.0009067023474926827, "clip_ratio/low_mean": 0.0011340478595229797, "clip_ratio/low_min": 5.071438044979004e-05, "clip_ratio/region_mean": 0.002040750208834652, "epoch": 0.0991197522006195, "grad_norm": 0.10718478262424469, "learning_rate": 1e-06, "loss": 0.0345, "step": 1062 }, { "clip_ratio/high_max": 0.002620814244437497, "clip_ratio/high_mean": 0.0012037802516715601, "clip_ratio/low_mean": 0.0010417981029604562, "clip_ratio/low_min": 4.45548339484958e-05, "clip_ratio/region_mean": 0.0022455783109762706, "epoch": 0.09921308530062008, "grad_norm": 0.11672727018594742, "learning_rate": 1e-06, "loss": -0.0433, "step": 1063 }, { "clip_ratio/high_max": 0.0027213723442400806, "clip_ratio/high_mean": 0.001120669265219476, "clip_ratio/low_mean": 0.0011299401230644435, "clip_ratio/low_min": 0.0001174892604467459, "clip_ratio/region_mean": 0.002250609381007962, "epoch": 0.09930641840062067, "grad_norm": 0.1069716140627861, "learning_rate": 1e-06, "loss": 0.0297, "step": 1064 }, { "clip_ratio/high_max": 0.002789350524835754, "clip_ratio/high_mean": 0.001190496723211254, "clip_ratio/low_mean": 0.0012492367568484042, "clip_ratio/low_min": 8.338424322573701e-05, "clip_ratio/region_mean": 0.0024397334491368383, "epoch": 0.09939975150062125, "grad_norm": 0.11621293425559998, "learning_rate": 1e-06, "loss": 0.002, "step": 1065 }, { "clip_ratio/high_max": 0.0023896116981632076, "clip_ratio/high_mean": 0.0010325006915081758, "clip_ratio/low_mean": 0.0010244978693663143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020569985717884265, "epoch": 0.09949308460062183, "grad_norm": 0.11649531871080399, "learning_rate": 1e-06, "loss": 0.0025, "step": 1066 }, { "clip_ratio/high_max": 0.002675373980309814, "clip_ratio/high_mean": 0.001119227999879513, "clip_ratio/low_mean": 0.0010504302099434426, "clip_ratio/low_min": 0.00012363130736048333, "clip_ratio/region_mean": 0.002169658189814072, "epoch": 0.09958641770062242, "grad_norm": 0.11689122021198273, "learning_rate": 1e-06, "loss": 0.0476, "step": 1067 }, { "clip_ratio/high_max": 0.003016795228177216, "clip_ratio/high_mean": 0.001307435319176875, "clip_ratio/low_mean": 0.0010618365740810987, "clip_ratio/low_min": 9.151411177299451e-05, "clip_ratio/region_mean": 0.0023692718968959525, "epoch": 0.099679750800623, "grad_norm": 0.12360195815563202, "learning_rate": 1e-06, "loss": -0.0513, "step": 1068 }, { "clip_ratio/high_max": 0.0024809023343550507, "clip_ratio/high_mean": 0.0008891513189155376, "clip_ratio/low_mean": 0.0011078695497417357, "clip_ratio/low_min": 4.79866876048618e-05, "clip_ratio/region_mean": 0.001997020903218072, "epoch": 0.09977308390062359, "grad_norm": 0.09974703192710876, "learning_rate": 1e-06, "loss": 0.0155, "step": 1069 }, { "clip_ratio/high_max": 0.00273111074056942, "clip_ratio/high_mean": 0.001131101857026806, "clip_ratio/low_mean": 0.0012078444178769132, "clip_ratio/low_min": 0.00013443205170915462, "clip_ratio/region_mean": 0.002338946251256857, "epoch": 0.09986641700062417, "grad_norm": 0.11888189613819122, "learning_rate": 1e-06, "loss": 0.0427, "step": 1070 }, { "clip_ratio/high_max": 0.002887275615648832, "clip_ratio/high_mean": 0.0012772666341334116, "clip_ratio/low_mean": 0.0012335664723650552, "clip_ratio/low_min": 8.425303394687944e-05, "clip_ratio/region_mean": 0.002510833124688361, "epoch": 0.09995975010062474, "grad_norm": 0.12239677459001541, "learning_rate": 1e-06, "loss": 0.0015, "step": 1071 }, { "clip_ratio/high_max": 0.002579187144874595, "clip_ratio/high_mean": 0.0011054616479668766, "clip_ratio/low_mean": 0.0013819660089211538, "clip_ratio/low_min": 0.00012948144467372913, "clip_ratio/region_mean": 0.0024874277296476066, "epoch": 0.10005308320062534, "grad_norm": 0.12685957551002502, "learning_rate": 1e-06, "loss": 0.0405, "step": 1072 }, { "clip_ratio/high_max": 0.0025072550561162643, "clip_ratio/high_mean": 0.0011194720736966701, "clip_ratio/low_mean": 0.0010372953802288976, "clip_ratio/low_min": 5.5406057072104886e-05, "clip_ratio/region_mean": 0.0021567675212281756, "epoch": 0.10014641630062592, "grad_norm": 0.11538335680961609, "learning_rate": 1e-06, "loss": -0.0202, "step": 1073 }, { "clip_ratio/high_max": 0.0019684297330968548, "clip_ratio/high_mean": 0.0010255548240820644, "clip_ratio/low_mean": 0.0012416544414008968, "clip_ratio/low_min": 5.227260589890648e-05, "clip_ratio/region_mean": 0.002267209281853866, "epoch": 0.1002397494006265, "grad_norm": 0.11673750728368759, "learning_rate": 1e-06, "loss": 0.0124, "step": 1074 }, { "clip_ratio/high_max": 0.0024463470035698265, "clip_ratio/high_mean": 0.0010314030059817014, "clip_ratio/low_mean": 0.001159633702627616, "clip_ratio/low_min": 4.8209774831775576e-05, "clip_ratio/region_mean": 0.002191036743170116, "epoch": 0.10033308250062709, "grad_norm": 0.10937150567770004, "learning_rate": 1e-06, "loss": 0.0241, "step": 1075 }, { "clip_ratio/high_max": 0.002496567711204989, "clip_ratio/high_mean": 0.0011542028332769405, "clip_ratio/low_mean": 0.0010634353930072393, "clip_ratio/low_min": 3.797896351898089e-05, "clip_ratio/region_mean": 0.0022176381971803494, "epoch": 0.10042641560062766, "grad_norm": 0.10290578752756119, "learning_rate": 1e-06, "loss": -0.0173, "step": 1076 }, { "clip_ratio/high_max": 0.002785484801279381, "clip_ratio/high_mean": 0.0012426740868249908, "clip_ratio/low_mean": 0.0010342115492676385, "clip_ratio/low_min": 0.00011341815206833417, "clip_ratio/region_mean": 0.002276885621540714, "epoch": 0.10051974870062824, "grad_norm": 0.10617270320653915, "learning_rate": 1e-06, "loss": -0.01, "step": 1077 }, { "clip_ratio/high_max": 0.0029206702020019293, "clip_ratio/high_mean": 0.0012211155080876779, "clip_ratio/low_mean": 0.0010975272380164824, "clip_ratio/low_min": 3.594410009100102e-05, "clip_ratio/region_mean": 0.002318642713362351, "epoch": 0.10061308180062883, "grad_norm": 0.11008171737194061, "learning_rate": 1e-06, "loss": 0.0201, "step": 1078 }, { "clip_ratio/high_max": 0.002840934605046641, "clip_ratio/high_mean": 0.0010943061442958424, "clip_ratio/low_mean": 0.001305307261645794, "clip_ratio/low_min": 0.00020026239599246765, "clip_ratio/region_mean": 0.002399613324087113, "epoch": 0.10070641490062941, "grad_norm": 0.11556420475244522, "learning_rate": 1e-06, "loss": 0.089, "step": 1079 }, { "clip_ratio/high_max": 0.00236867959756637, "clip_ratio/high_mean": 0.0009672880096331937, "clip_ratio/low_mean": 0.0011380839096091222, "clip_ratio/low_min": 8.657145554025192e-05, "clip_ratio/region_mean": 0.0021053719538031146, "epoch": 0.10079974800063, "grad_norm": 0.10728225857019424, "learning_rate": 1e-06, "loss": 0.0295, "step": 1080 }, { "clip_ratio/high_max": 0.0025966270914068446, "clip_ratio/high_mean": 0.0010549810976954177, "clip_ratio/low_mean": 0.0011490706601762213, "clip_ratio/low_min": 0.0001105655105675396, "clip_ratio/region_mean": 0.0022040517869754694, "epoch": 0.10089308110063058, "grad_norm": 0.1247933879494667, "learning_rate": 1e-06, "loss": 0.0363, "step": 1081 }, { "clip_ratio/high_max": 0.0025589694996597245, "clip_ratio/high_mean": 0.0010329538617952494, "clip_ratio/low_mean": 0.0013034118928771932, "clip_ratio/low_min": 0.00015880875798757188, "clip_ratio/region_mean": 0.0023363656873698346, "epoch": 0.10098641420063116, "grad_norm": 0.11883296817541122, "learning_rate": 1e-06, "loss": 0.0471, "step": 1082 }, { "clip_ratio/high_max": 0.0024430182384094223, "clip_ratio/high_mean": 0.0009682027248345548, "clip_ratio/low_mean": 0.0013421356670733076, "clip_ratio/low_min": 0.00017714464684104314, "clip_ratio/region_mean": 0.0023103384155547246, "epoch": 0.10107974730063175, "grad_norm": 0.11756071448326111, "learning_rate": 1e-06, "loss": 0.0522, "step": 1083 }, { "clip_ratio/high_max": 0.0029525988284149207, "clip_ratio/high_mean": 0.0012053053505951539, "clip_ratio/low_mean": 0.0011951777023568866, "clip_ratio/low_min": 0.0001447406866645906, "clip_ratio/region_mean": 0.002400483026576694, "epoch": 0.10117308040063233, "grad_norm": 0.11909200996160507, "learning_rate": 1e-06, "loss": -0.0141, "step": 1084 }, { "clip_ratio/high_max": 0.0023794097505742684, "clip_ratio/high_mean": 0.0011081549528171308, "clip_ratio/low_mean": 0.001021991116431309, "clip_ratio/low_min": 6.935955752851442e-05, "clip_ratio/region_mean": 0.0021301460874383338, "epoch": 0.10126641350063291, "grad_norm": 0.11934073269367218, "learning_rate": 1e-06, "loss": 0.009, "step": 1085 }, { "clip_ratio/high_max": 0.0024595874128863215, "clip_ratio/high_mean": 0.0009960763054550625, "clip_ratio/low_mean": 0.0011105345147370826, "clip_ratio/low_min": 0.00012179011264379369, "clip_ratio/region_mean": 0.002106610882037785, "epoch": 0.1013597466006335, "grad_norm": 0.11388830095529556, "learning_rate": 1e-06, "loss": 0.0356, "step": 1086 }, { "clip_ratio/high_max": 0.0027253152074990794, "clip_ratio/high_mean": 0.0011164219868078362, "clip_ratio/low_mean": 0.0013074361777398735, "clip_ratio/low_min": 0.0001853404182838858, "clip_ratio/region_mean": 0.0024238581900135614, "epoch": 0.10145307970063408, "grad_norm": 0.11670181155204773, "learning_rate": 1e-06, "loss": 0.0754, "step": 1087 }, { "clip_ratio/high_max": 0.002694049384444952, "clip_ratio/high_mean": 0.0011580070895433892, "clip_ratio/low_mean": 0.001186084860819392, "clip_ratio/low_min": 5.331829197530169e-05, "clip_ratio/region_mean": 0.0023440919758286327, "epoch": 0.10154641280063466, "grad_norm": 0.12323874235153198, "learning_rate": 1e-06, "loss": 0.0213, "step": 1088 }, { "clip_ratio/high_max": 0.002950062364106998, "clip_ratio/high_mean": 0.001152325676230248, "clip_ratio/low_mean": 0.001114763621444581, "clip_ratio/low_min": 0.00010728094275691546, "clip_ratio/region_mean": 0.0022670893085887656, "epoch": 0.10163974590063525, "grad_norm": 0.11454440653324127, "learning_rate": 1e-06, "loss": 0.001, "step": 1089 }, { "clip_ratio/high_max": 0.002618478763906751, "clip_ratio/high_mean": 0.00103928150201682, "clip_ratio/low_mean": 0.0011396243608032819, "clip_ratio/low_min": 5.77608952880837e-05, "clip_ratio/region_mean": 0.0021789058664580807, "epoch": 0.10173307900063583, "grad_norm": 0.10497259348630905, "learning_rate": 1e-06, "loss": 0.0408, "step": 1090 }, { "clip_ratio/high_max": 0.003017491202626843, "clip_ratio/high_mean": 0.0011521638098201947, "clip_ratio/low_mean": 0.0011818067432614043, "clip_ratio/low_min": 0.00013988734826853033, "clip_ratio/region_mean": 0.0023339705148828216, "epoch": 0.10182641210063642, "grad_norm": 0.10692637413740158, "learning_rate": 1e-06, "loss": 0.0047, "step": 1091 }, { "clip_ratio/high_max": 0.002412800578895258, "clip_ratio/high_mean": 0.0009487999195698649, "clip_ratio/low_mean": 0.001282595903830952, "clip_ratio/low_min": 5.246302407613257e-05, "clip_ratio/region_mean": 0.002231395774288103, "epoch": 0.101919745200637, "grad_norm": 0.10796523094177246, "learning_rate": 1e-06, "loss": 0.05, "step": 1092 }, { "clip_ratio/high_max": 0.002751141888438724, "clip_ratio/high_mean": 0.0011391532025299966, "clip_ratio/low_mean": 0.0012838768197980244, "clip_ratio/low_min": 1.3586956811195705e-05, "clip_ratio/region_mean": 0.0024230299895862117, "epoch": 0.10201307830063758, "grad_norm": 0.11812913417816162, "learning_rate": 1e-06, "loss": 0.0268, "step": 1093 }, { "clip_ratio/high_max": 0.002707813720917329, "clip_ratio/high_mean": 0.0011065188191423658, "clip_ratio/low_mean": 0.00135228780345642, "clip_ratio/low_min": 3.724730049725622e-05, "clip_ratio/region_mean": 0.002458806637150701, "epoch": 0.10210641140063817, "grad_norm": 0.1216379702091217, "learning_rate": 1e-06, "loss": 0.0518, "step": 1094 }, { "clip_ratio/high_max": 0.003174927936925087, "clip_ratio/high_mean": 0.0013223898931755684, "clip_ratio/low_mean": 0.0013807560899294913, "clip_ratio/low_min": 3.065340933972038e-05, "clip_ratio/region_mean": 0.00270314601220889, "epoch": 0.10219974450063875, "grad_norm": 0.13937297463417053, "learning_rate": 1e-06, "loss": 0.0086, "step": 1095 }, { "clip_ratio/high_max": 0.002800120688334573, "clip_ratio/high_mean": 0.0012272596941329539, "clip_ratio/low_mean": 0.0013185914540372323, "clip_ratio/low_min": 0.00012680955660471227, "clip_ratio/region_mean": 0.0025458511227043346, "epoch": 0.10229307760063933, "grad_norm": 0.11575004458427429, "learning_rate": 1e-06, "loss": 0.0083, "step": 1096 }, { "clip_ratio/high_max": 0.0032278427679557353, "clip_ratio/high_mean": 0.001216051994560985, "clip_ratio/low_mean": 0.0015723384931334294, "clip_ratio/low_min": 0.0001675063685979694, "clip_ratio/region_mean": 0.0027883905422640964, "epoch": 0.10238641070063992, "grad_norm": 0.12887288630008698, "learning_rate": 1e-06, "loss": 0.021, "step": 1097 }, { "clip_ratio/high_max": 0.0025176868948619813, "clip_ratio/high_mean": 0.0010847801859199535, "clip_ratio/low_mean": 0.001257452695426764, "clip_ratio/low_min": 0.00013740182657784317, "clip_ratio/region_mean": 0.0023422329395543784, "epoch": 0.1024797438006405, "grad_norm": 0.10793988406658173, "learning_rate": 1e-06, "loss": 0.0067, "step": 1098 }, { "clip_ratio/high_max": 0.0028035539071424864, "clip_ratio/high_mean": 0.001135479749791557, "clip_ratio/low_mean": 0.0011583281884668395, "clip_ratio/low_min": 0.00013805752678308636, "clip_ratio/region_mean": 0.002293807905516587, "epoch": 0.10257307690064109, "grad_norm": 0.12659579515457153, "learning_rate": 1e-06, "loss": 0.0069, "step": 1099 }, { "clip_ratio/high_max": 0.0026034199618152343, "clip_ratio/high_mean": 0.0009543860342091648, "clip_ratio/low_mean": 0.0011771291501645464, "clip_ratio/low_min": 7.552693477919092e-05, "clip_ratio/region_mean": 0.0021315151898306794, "epoch": 0.10266641000064167, "grad_norm": 0.09649745374917984, "learning_rate": 1e-06, "loss": 0.0303, "step": 1100 }, { "clip_ratio/high_max": 0.0031048729506437667, "clip_ratio/high_mean": 0.0012960248004674213, "clip_ratio/low_mean": 0.0013950432803540025, "clip_ratio/low_min": 0.00017725034558679909, "clip_ratio/region_mean": 0.002691068089916371, "epoch": 0.10275974310064225, "grad_norm": 0.12436746060848236, "learning_rate": 1e-06, "loss": -0.0176, "step": 1101 }, { "clip_ratio/high_max": 0.0025201681528415065, "clip_ratio/high_mean": 0.0011660555574053433, "clip_ratio/low_mean": 0.001305936457356438, "clip_ratio/low_min": 0.00014024285155755933, "clip_ratio/region_mean": 0.00247199201839976, "epoch": 0.10285307620064284, "grad_norm": 0.13194289803504944, "learning_rate": 1e-06, "loss": 0.0371, "step": 1102 }, { "clip_ratio/high_max": 0.002811998245306313, "clip_ratio/high_mean": 0.001105553057641373, "clip_ratio/low_mean": 0.0012419717295415467, "clip_ratio/low_min": 8.691742641531164e-05, "clip_ratio/region_mean": 0.0023475248235627078, "epoch": 0.10294640930064342, "grad_norm": 0.11174826323986053, "learning_rate": 1e-06, "loss": 0.0444, "step": 1103 }, { "clip_ratio/high_max": 0.0029067390169075225, "clip_ratio/high_mean": 0.0010887072148761945, "clip_ratio/low_mean": 0.0013747961238550488, "clip_ratio/low_min": 0.00014209014534571907, "clip_ratio/region_mean": 0.0024635033769300207, "epoch": 0.103039742400644, "grad_norm": 0.11580396443605423, "learning_rate": 1e-06, "loss": 0.0685, "step": 1104 }, { "clip_ratio/high_max": 0.0025924588553607464, "clip_ratio/high_mean": 0.0011545375673449598, "clip_ratio/low_mean": 0.0010833912219823105, "clip_ratio/low_min": 5.3648069297196344e-05, "clip_ratio/region_mean": 0.002237928711110726, "epoch": 0.10313307550064459, "grad_norm": 0.12212143838405609, "learning_rate": 1e-06, "loss": 0.0017, "step": 1105 }, { "clip_ratio/high_max": 0.0029773897913401015, "clip_ratio/high_mean": 0.001277524384931894, "clip_ratio/low_mean": 0.0012416719255270436, "clip_ratio/low_min": 0.00013018519166507758, "clip_ratio/region_mean": 0.0025191962704411708, "epoch": 0.10322640860064516, "grad_norm": 0.11835592985153198, "learning_rate": 1e-06, "loss": 0.0512, "step": 1106 }, { "clip_ratio/high_max": 0.002519008434319403, "clip_ratio/high_mean": 0.0010240841293125413, "clip_ratio/low_mean": 0.0011124684879177948, "clip_ratio/low_min": 1.1299945981591009e-05, "clip_ratio/region_mean": 0.002136552648153156, "epoch": 0.10331974170064574, "grad_norm": 0.1100822240114212, "learning_rate": 1e-06, "loss": 0.0211, "step": 1107 }, { "clip_ratio/high_max": 0.003239823978219647, "clip_ratio/high_mean": 0.0012647863622987643, "clip_ratio/low_mean": 0.0011455011917860247, "clip_ratio/low_min": 3.245824973419076e-05, "clip_ratio/region_mean": 0.0024102876050164923, "epoch": 0.10341307480064633, "grad_norm": 0.12142063677310944, "learning_rate": 1e-06, "loss": 0.0479, "step": 1108 }, { "clip_ratio/high_max": 0.0024605692160548642, "clip_ratio/high_mean": 0.0011022895414498635, "clip_ratio/low_mean": 0.0011044156453863252, "clip_ratio/low_min": 0.00010550630213401746, "clip_ratio/region_mean": 0.002206705161370337, "epoch": 0.10350640790064691, "grad_norm": 0.11533405631780624, "learning_rate": 1e-06, "loss": 0.0537, "step": 1109 }, { "clip_ratio/high_max": 0.002634015130752232, "clip_ratio/high_mean": 0.0010467530919413548, "clip_ratio/low_mean": 0.0012135065953771118, "clip_ratio/low_min": 0.00010743216807895806, "clip_ratio/region_mean": 0.002260259680042509, "epoch": 0.1035997410006475, "grad_norm": 0.11456581950187683, "learning_rate": 1e-06, "loss": 0.0332, "step": 1110 }, { "clip_ratio/high_max": 0.003077726694755256, "clip_ratio/high_mean": 0.0013154583939467557, "clip_ratio/low_mean": 0.0010996196888299892, "clip_ratio/low_min": 1.982553476409521e-05, "clip_ratio/region_mean": 0.0024150780591298826, "epoch": 0.10369307410064808, "grad_norm": 0.11310114711523056, "learning_rate": 1e-06, "loss": -0.0016, "step": 1111 }, { "clip_ratio/high_max": 0.0026963762429659255, "clip_ratio/high_mean": 0.0011136175235151313, "clip_ratio/low_mean": 0.0012948331313964445, "clip_ratio/low_min": 0.00014351652498589829, "clip_ratio/region_mean": 0.0024084506221697666, "epoch": 0.10378640720064866, "grad_norm": 0.11605637520551682, "learning_rate": 1e-06, "loss": 0.0304, "step": 1112 }, { "clip_ratio/high_max": 0.0028868283989140764, "clip_ratio/high_mean": 0.0011918912623514188, "clip_ratio/low_mean": 0.0012752842994814273, "clip_ratio/low_min": 0.00010052474772237474, "clip_ratio/region_mean": 0.0024671755454619415, "epoch": 0.10387974030064925, "grad_norm": 0.12642115354537964, "learning_rate": 1e-06, "loss": 0.0358, "step": 1113 }, { "clip_ratio/high_max": 0.00247761289938353, "clip_ratio/high_mean": 0.0010557337609498063, "clip_ratio/low_mean": 0.001374893054162385, "clip_ratio/low_min": 0.0001848703104769811, "clip_ratio/region_mean": 0.0024306268023792654, "epoch": 0.10397307340064983, "grad_norm": 0.11440667510032654, "learning_rate": 1e-06, "loss": 0.0286, "step": 1114 }, { "clip_ratio/high_max": 0.003179244667990133, "clip_ratio/high_mean": 0.0012934274636791088, "clip_ratio/low_mean": 0.0013286632456583902, "clip_ratio/low_min": 0.00010015191583079286, "clip_ratio/region_mean": 0.0026220906947855838, "epoch": 0.10406640650065041, "grad_norm": 0.11868170648813248, "learning_rate": 1e-06, "loss": 0.0189, "step": 1115 }, { "clip_ratio/high_max": 0.0024273158123833127, "clip_ratio/high_mean": 0.0010362270386394812, "clip_ratio/low_mean": 0.0013252519311208744, "clip_ratio/low_min": 0.00016632347433187533, "clip_ratio/region_mean": 0.0023614789388375357, "epoch": 0.104159739600651, "grad_norm": 0.09607911854982376, "learning_rate": 1e-06, "loss": 0.0317, "step": 1116 }, { "clip_ratio/high_max": 0.003292489651357755, "clip_ratio/high_mean": 0.001346397039014846, "clip_ratio/low_mean": 0.0013972801534691826, "clip_ratio/low_min": 0.00013283076623338275, "clip_ratio/region_mean": 0.00274367712700041, "epoch": 0.10425307270065158, "grad_norm": 0.12791144847869873, "learning_rate": 1e-06, "loss": 0.0292, "step": 1117 }, { "clip_ratio/high_max": 0.002678358861885499, "clip_ratio/high_mean": 0.0011418829817557707, "clip_ratio/low_mean": 0.001304392222664319, "clip_ratio/low_min": 0.00011797684510383988, "clip_ratio/region_mean": 0.0024462751534883864, "epoch": 0.10434640580065216, "grad_norm": 0.12478093057870865, "learning_rate": 1e-06, "loss": 0.0169, "step": 1118 }, { "clip_ratio/high_max": 0.0028471623809309676, "clip_ratio/high_mean": 0.0012985945941181853, "clip_ratio/low_mean": 0.0014803677913732827, "clip_ratio/low_min": 0.00014858556005492574, "clip_ratio/region_mean": 0.00277896234911168, "epoch": 0.10443973890065275, "grad_norm": 0.11772139370441437, "learning_rate": 1e-06, "loss": 0.0053, "step": 1119 }, { "clip_ratio/high_max": 0.0024437717511318624, "clip_ratio/high_mean": 0.0011579339370655362, "clip_ratio/low_mean": 0.0014704883178637829, "clip_ratio/low_min": 0.00026892401911027264, "clip_ratio/region_mean": 0.002628422196721658, "epoch": 0.10453307200065333, "grad_norm": 0.12378618121147156, "learning_rate": 1e-06, "loss": 0.0506, "step": 1120 }, { "clip_ratio/high_max": 0.003035665096831508, "clip_ratio/high_mean": 0.0011799336498370394, "clip_ratio/low_mean": 0.0013901290294597857, "clip_ratio/low_min": 0.00011342339439579519, "clip_ratio/region_mean": 0.0025700626938487403, "epoch": 0.10462640510065392, "grad_norm": 0.10745785385370255, "learning_rate": 1e-06, "loss": 0.0094, "step": 1121 }, { "clip_ratio/high_max": 0.002812719343637582, "clip_ratio/high_mean": 0.0011656712122203317, "clip_ratio/low_mean": 0.0011819880601251498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023476592541555874, "epoch": 0.1047197382006545, "grad_norm": 0.09620406478643417, "learning_rate": 1e-06, "loss": 0.0269, "step": 1122 }, { "clip_ratio/high_max": 0.0030243383735069074, "clip_ratio/high_mean": 0.001192717549201916, "clip_ratio/low_mean": 0.0011905831925105304, "clip_ratio/low_min": 3.1554272027278785e-05, "clip_ratio/region_mean": 0.0023833007071516477, "epoch": 0.10481307130065508, "grad_norm": 0.12845869362354279, "learning_rate": 1e-06, "loss": 0.0218, "step": 1123 }, { "clip_ratio/high_max": 0.002959868113975972, "clip_ratio/high_mean": 0.0013269581268104957, "clip_ratio/low_mean": 0.0013991171654197387, "clip_ratio/low_min": 0.00014159019065118628, "clip_ratio/region_mean": 0.002726075290411245, "epoch": 0.10490640440065567, "grad_norm": 0.11898820102214813, "learning_rate": 1e-06, "loss": 0.0118, "step": 1124 }, { "clip_ratio/high_max": 0.002799288362439256, "clip_ratio/high_mean": 0.0011499168358568568, "clip_ratio/low_mean": 0.0013406696889433078, "clip_ratio/low_min": 0.00011278265174041735, "clip_ratio/region_mean": 0.0024905865429900587, "epoch": 0.10499973750065625, "grad_norm": 0.11534126102924347, "learning_rate": 1e-06, "loss": 0.0323, "step": 1125 }, { "clip_ratio/high_max": 0.0026708424265962094, "clip_ratio/high_mean": 0.0011207122952328064, "clip_ratio/low_mean": 0.001268258009076817, "clip_ratio/low_min": 4.435517075762618e-05, "clip_ratio/region_mean": 0.0023889703152235597, "epoch": 0.10509307060065683, "grad_norm": 0.11435788869857788, "learning_rate": 1e-06, "loss": 0.0388, "step": 1126 }, { "clip_ratio/high_max": 0.002856292532669613, "clip_ratio/high_mean": 0.0011401038645999506, "clip_ratio/low_mean": 0.0013795285594824236, "clip_ratio/low_min": 0.00020529696848825552, "clip_ratio/region_mean": 0.002519632500479929, "epoch": 0.10518640370065742, "grad_norm": 0.1270683854818344, "learning_rate": 1e-06, "loss": 0.052, "step": 1127 }, { "clip_ratio/high_max": 0.002971828267618548, "clip_ratio/high_mean": 0.0012049351826135535, "clip_ratio/low_mean": 0.0011503495697979815, "clip_ratio/low_min": 1.3799955922877416e-05, "clip_ratio/region_mean": 0.002355284770601429, "epoch": 0.105279736800658, "grad_norm": 0.10787615925073624, "learning_rate": 1e-06, "loss": 0.0037, "step": 1128 }, { "clip_ratio/high_max": 0.002530510682845488, "clip_ratio/high_mean": 0.0009791489810595522, "clip_ratio/low_mean": 0.0014299782378657255, "clip_ratio/low_min": 0.00022141271983855404, "clip_ratio/region_mean": 0.0024091272134683095, "epoch": 0.10537306990065858, "grad_norm": 0.12056026607751846, "learning_rate": 1e-06, "loss": 0.0883, "step": 1129 }, { "clip_ratio/high_max": 0.0027782598335761577, "clip_ratio/high_mean": 0.001321903331700014, "clip_ratio/low_mean": 0.0011844348191516474, "clip_ratio/low_min": 8.406902088609058e-05, "clip_ratio/region_mean": 0.0025063381326617673, "epoch": 0.10546640300065917, "grad_norm": 0.11933311820030212, "learning_rate": 1e-06, "loss": 0.006, "step": 1130 }, { "clip_ratio/high_max": 0.002532692094973754, "clip_ratio/high_mean": 0.001146624073953717, "clip_ratio/low_mean": 0.0014163031555654015, "clip_ratio/low_min": 0.00013558315549744293, "clip_ratio/region_mean": 0.0025629272422520444, "epoch": 0.10555973610065975, "grad_norm": 0.11837572604417801, "learning_rate": 1e-06, "loss": 0.0297, "step": 1131 }, { "clip_ratio/high_max": 0.003131964367639739, "clip_ratio/high_mean": 0.0011996909161098301, "clip_ratio/low_mean": 0.0013013777788728476, "clip_ratio/low_min": 4.4867192627862096e-05, "clip_ratio/region_mean": 0.0025010687022586353, "epoch": 0.10565306920066034, "grad_norm": 0.11814764142036438, "learning_rate": 1e-06, "loss": 0.0083, "step": 1132 }, { "clip_ratio/high_max": 0.002500061586033553, "clip_ratio/high_mean": 0.0010158518598473165, "clip_ratio/low_mean": 0.001411313784046797, "clip_ratio/low_min": 0.0001168877279269509, "clip_ratio/region_mean": 0.0024271656657219864, "epoch": 0.10574640230066092, "grad_norm": 0.24052585661411285, "learning_rate": 1e-06, "loss": 0.0347, "step": 1133 }, { "clip_ratio/high_max": 0.003017571732925717, "clip_ratio/high_mean": 0.0013358462165342644, "clip_ratio/low_mean": 0.0016696455313649494, "clip_ratio/low_min": 0.00012916761716041947, "clip_ratio/region_mean": 0.0030054917297093198, "epoch": 0.1058397354006615, "grad_norm": 0.13456502556800842, "learning_rate": 1e-06, "loss": 0.0073, "step": 1134 }, { "clip_ratio/high_max": 0.002609604620374739, "clip_ratio/high_mean": 0.0010941214168269653, "clip_ratio/low_mean": 0.0012300178677833173, "clip_ratio/low_min": 9.737022537592566e-05, "clip_ratio/region_mean": 0.0023241393209900707, "epoch": 0.10593306850066209, "grad_norm": 0.11512071639299393, "learning_rate": 1e-06, "loss": 0.011, "step": 1135 }, { "clip_ratio/high_max": 0.0027544667827896774, "clip_ratio/high_mean": 0.0011002486317011062, "clip_ratio/low_mean": 0.0011712760096997954, "clip_ratio/low_min": 8.004349729162641e-05, "clip_ratio/region_mean": 0.0022715246013831347, "epoch": 0.10602640160066266, "grad_norm": 0.10703384131193161, "learning_rate": 1e-06, "loss": 0.0482, "step": 1136 }, { "clip_ratio/high_max": 0.0026286660868208855, "clip_ratio/high_mean": 0.001149256895587314, "clip_ratio/low_mean": 0.0013963623641757295, "clip_ratio/low_min": 0.00022232529045140836, "clip_ratio/region_mean": 0.002545619245211128, "epoch": 0.10611973470066324, "grad_norm": 0.11007598787546158, "learning_rate": 1e-06, "loss": 0.0356, "step": 1137 }, { "clip_ratio/high_max": 0.002739051531534642, "clip_ratio/high_mean": 0.0010739501085481606, "clip_ratio/low_mean": 0.0013600182355730794, "clip_ratio/low_min": 3.835406005237019e-05, "clip_ratio/region_mean": 0.0024339684532606043, "epoch": 0.10621306780066384, "grad_norm": 0.12848563492298126, "learning_rate": 1e-06, "loss": 0.0722, "step": 1138 }, { "clip_ratio/high_max": 0.002559735548857134, "clip_ratio/high_mean": 0.0010289772699252353, "clip_ratio/low_mean": 0.0014918754186510341, "clip_ratio/low_min": 0.00017435932386433706, "clip_ratio/region_mean": 0.0025208526640199125, "epoch": 0.10630640090066441, "grad_norm": 0.11483762413263321, "learning_rate": 1e-06, "loss": 0.0353, "step": 1139 }, { "clip_ratio/high_max": 0.0029175181116443127, "clip_ratio/high_mean": 0.001080374262528494, "clip_ratio/low_mean": 0.0013497868967533577, "clip_ratio/low_min": 0.0001887104444904253, "clip_ratio/region_mean": 0.0024301611047121696, "epoch": 0.10639973400066499, "grad_norm": 0.10692422091960907, "learning_rate": 1e-06, "loss": 0.0379, "step": 1140 }, { "clip_ratio/high_max": 0.002700121905945707, "clip_ratio/high_mean": 0.0011649370680970605, "clip_ratio/low_mean": 0.0012081735185347497, "clip_ratio/low_min": 4.405792060424574e-05, "clip_ratio/region_mean": 0.002373110633925535, "epoch": 0.10649306710066558, "grad_norm": 0.11575724184513092, "learning_rate": 1e-06, "loss": 0.0079, "step": 1141 }, { "clip_ratio/high_max": 0.00306388007447822, "clip_ratio/high_mean": 0.0012905216790386476, "clip_ratio/low_mean": 0.001202979481604416, "clip_ratio/low_min": 0.00012522148972493596, "clip_ratio/region_mean": 0.002493501131539233, "epoch": 0.10658640020066616, "grad_norm": 0.11531451344490051, "learning_rate": 1e-06, "loss": 0.0019, "step": 1142 }, { "clip_ratio/high_max": 0.0030013798459549434, "clip_ratio/high_mean": 0.001199832830025116, "clip_ratio/low_mean": 0.0012669624702539295, "clip_ratio/low_min": 6.315956125035882e-05, "clip_ratio/region_mean": 0.0024667953330208547, "epoch": 0.10667973330066675, "grad_norm": 0.12019931524991989, "learning_rate": 1e-06, "loss": 0.0192, "step": 1143 }, { "clip_ratio/high_max": 0.0023712478287052363, "clip_ratio/high_mean": 0.0009431351518287556, "clip_ratio/low_mean": 0.0013089040967315668, "clip_ratio/low_min": 4.538583652902162e-05, "clip_ratio/region_mean": 0.002252039223094471, "epoch": 0.10677306640066733, "grad_norm": 0.14286024868488312, "learning_rate": 1e-06, "loss": 0.0554, "step": 1144 }, { "clip_ratio/high_max": 0.0029089252711855806, "clip_ratio/high_mean": 0.0012521858698164579, "clip_ratio/low_mean": 0.0011544116096047219, "clip_ratio/low_min": 4.719277058029547e-05, "clip_ratio/region_mean": 0.0024065975376288407, "epoch": 0.10686639950066791, "grad_norm": 0.14004789292812347, "learning_rate": 1e-06, "loss": 0.0062, "step": 1145 }, { "clip_ratio/high_max": 0.0030094570392975584, "clip_ratio/high_mean": 0.001335574190306943, "clip_ratio/low_mean": 0.0015135196408664342, "clip_ratio/low_min": 0.00018862119850382442, "clip_ratio/region_mean": 0.002849093834811356, "epoch": 0.1069597326006685, "grad_norm": 0.10762453824281693, "learning_rate": 1e-06, "loss": 0.0174, "step": 1146 }, { "clip_ratio/high_max": 0.0025431940230191685, "clip_ratio/high_mean": 0.001127167975937482, "clip_ratio/low_mean": 0.001422966699465178, "clip_ratio/low_min": 0.0001494800417276565, "clip_ratio/region_mean": 0.002550134580815211, "epoch": 0.10705306570066908, "grad_norm": 0.11267776787281036, "learning_rate": 1e-06, "loss": 0.0436, "step": 1147 }, { "clip_ratio/high_max": 0.0023202954907901585, "clip_ratio/high_mean": 0.0010011605481849983, "clip_ratio/low_mean": 0.0012671764779952355, "clip_ratio/low_min": 0.00012124216209485894, "clip_ratio/region_mean": 0.002268337018904276, "epoch": 0.10714639880066966, "grad_norm": 0.11383038014173508, "learning_rate": 1e-06, "loss": 0.054, "step": 1148 }, { "clip_ratio/high_max": 0.002680772333405912, "clip_ratio/high_mean": 0.0011563459120225161, "clip_ratio/low_mean": 0.0013334371869859751, "clip_ratio/low_min": 0.00015424608773173532, "clip_ratio/region_mean": 0.0024897830517147668, "epoch": 0.10723973190067025, "grad_norm": 0.11772894114255905, "learning_rate": 1e-06, "loss": 0.0464, "step": 1149 }, { "clip_ratio/high_max": 0.0027182163175893947, "clip_ratio/high_mean": 0.001158545182988746, "clip_ratio/low_mean": 0.0012444767053239048, "clip_ratio/low_min": 3.792418738157721e-05, "clip_ratio/region_mean": 0.002403021884674672, "epoch": 0.10733306500067083, "grad_norm": 0.11122532933950424, "learning_rate": 1e-06, "loss": 0.041, "step": 1150 }, { "clip_ratio/high_max": 0.003199928018148057, "clip_ratio/high_mean": 0.0012628374388441443, "clip_ratio/low_mean": 0.0014054879393370356, "clip_ratio/low_min": 0.00015092244575498626, "clip_ratio/region_mean": 0.0026683254036470316, "epoch": 0.10742639810067142, "grad_norm": 0.1107155978679657, "learning_rate": 1e-06, "loss": 0.0533, "step": 1151 }, { "clip_ratio/high_max": 0.0025988160050474107, "clip_ratio/high_mean": 0.001165936706456705, "clip_ratio/low_mean": 0.0012795019210898317, "clip_ratio/low_min": 0.0001081228319890215, "clip_ratio/region_mean": 0.0024454386366414838, "epoch": 0.107519731200672, "grad_norm": 0.12099910527467728, "learning_rate": 1e-06, "loss": 0.0114, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009722028459821397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 605.1776123046875, "completions/mean_terminated_length": 570.906494140625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.10761306430067258, "grad_norm": 0.11977685987949371, "learning_rate": 1e-06, "loss": 0.0285, "num_tokens": 809310308.0, "reward": 0.6238664984703064, "reward_std": 0.17871810495853424, "rewards/simpleverify_reward/mean": 0.6238664984703064, "rewards/simpleverify_reward/std": 0.4844163954257965, "step": 1153 }, { "clip_ratio/high_max": 0.0022051456398912705, "clip_ratio/high_mean": 0.0008482579705741955, "clip_ratio/low_mean": 0.0005361665589589393, "clip_ratio/low_min": 4.130187153350562e-05, "clip_ratio/region_mean": 0.0013844245549989864, "epoch": 0.10770639740067317, "grad_norm": 0.11634834110736847, "learning_rate": 1e-06, "loss": 0.0269, "step": 1154 }, { "clip_ratio/high_max": 0.0020037503163621295, "clip_ratio/high_mean": 0.0007370135535893496, "clip_ratio/low_mean": 0.0005831434064020868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001320156985457288, "epoch": 0.10779973050067375, "grad_norm": 0.12227138131856918, "learning_rate": 1e-06, "loss": 0.0196, "step": 1155 }, { "clip_ratio/high_max": 0.0017835945836850442, "clip_ratio/high_mean": 0.0007434545259457082, "clip_ratio/low_mean": 0.0005530421085495618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012964966299477965, "epoch": 0.10789306360067433, "grad_norm": 0.11062200367450714, "learning_rate": 1e-06, "loss": 0.007, "step": 1156 }, { "clip_ratio/high_max": 0.002101159185258439, "clip_ratio/high_mean": 0.000812598049378721, "clip_ratio/low_mean": 0.0007404301886708708, "clip_ratio/low_min": 5.102363502373919e-05, "clip_ratio/region_mean": 0.0015530282180407085, "epoch": 0.10798639670067492, "grad_norm": 0.1311548352241516, "learning_rate": 1e-06, "loss": 0.0564, "step": 1157 }, { "clip_ratio/high_max": 0.0020240997400833294, "clip_ratio/high_mean": 0.0007514389690186363, "clip_ratio/low_mean": 0.0007478377483494114, "clip_ratio/low_min": 8.254503836724325e-05, "clip_ratio/region_mean": 0.0014992767028161325, "epoch": 0.1080797298006755, "grad_norm": 0.11050847917795181, "learning_rate": 1e-06, "loss": 0.0306, "step": 1158 }, { "clip_ratio/high_max": 0.0021855211671208963, "clip_ratio/high_mean": 0.0009592939786671195, "clip_ratio/low_mean": 0.0007354513527388917, "clip_ratio/low_min": 1.4010312042955775e-05, "clip_ratio/region_mean": 0.0016947453477769159, "epoch": 0.10817306290067608, "grad_norm": 0.11717856675386429, "learning_rate": 1e-06, "loss": -0.0243, "step": 1159 }, { "clip_ratio/high_max": 0.002265210474433843, "clip_ratio/high_mean": 0.0010114832512044813, "clip_ratio/low_mean": 0.000883542104929802, "clip_ratio/low_min": 0.00010992027819156647, "clip_ratio/region_mean": 0.0018950253070215695, "epoch": 0.10826639600067667, "grad_norm": 0.11830315738916397, "learning_rate": 1e-06, "loss": 0.0253, "step": 1160 }, { "clip_ratio/high_max": 0.0019347222842043266, "clip_ratio/high_mean": 0.0008118970181385521, "clip_ratio/low_mean": 0.0010327022901037708, "clip_ratio/low_min": 8.361271375179058e-05, "clip_ratio/region_mean": 0.0018445992755005136, "epoch": 0.10835972910067725, "grad_norm": 0.12328173965215683, "learning_rate": 1e-06, "loss": 0.0506, "step": 1161 }, { "clip_ratio/high_max": 0.002106198477122234, "clip_ratio/high_mean": 0.0009211926753778243, "clip_ratio/low_mean": 0.0008750574124860577, "clip_ratio/low_min": 0.00012926964427606435, "clip_ratio/region_mean": 0.0017962500642170198, "epoch": 0.10845306220067784, "grad_norm": 0.11745533347129822, "learning_rate": 1e-06, "loss": 0.0135, "step": 1162 }, { "clip_ratio/high_max": 0.0024538260731787886, "clip_ratio/high_mean": 0.0009322658333985601, "clip_ratio/low_mean": 0.0009371481701236917, "clip_ratio/low_min": 7.019098757155007e-05, "clip_ratio/region_mean": 0.001869413990789326, "epoch": 0.10854639530067842, "grad_norm": 0.12173112481832504, "learning_rate": 1e-06, "loss": 0.008, "step": 1163 }, { "clip_ratio/high_max": 0.0017985076483455487, "clip_ratio/high_mean": 0.0008000890466064448, "clip_ratio/low_mean": 0.001189785387396114, "clip_ratio/low_min": 0.0001357658202323364, "clip_ratio/region_mean": 0.001989874435821548, "epoch": 0.108639728400679, "grad_norm": 0.19829334318637848, "learning_rate": 1e-06, "loss": 0.049, "step": 1164 }, { "clip_ratio/high_max": 0.002381717160460539, "clip_ratio/high_mean": 0.0008635516369395191, "clip_ratio/low_mean": 0.0009943249970092438, "clip_ratio/low_min": 7.623442797921598e-05, "clip_ratio/region_mean": 0.0018578766248538159, "epoch": 0.10873306150067959, "grad_norm": 0.12362787872552872, "learning_rate": 1e-06, "loss": 0.0424, "step": 1165 }, { "clip_ratio/high_max": 0.0021821201153215952, "clip_ratio/high_mean": 0.0008784778801782522, "clip_ratio/low_mean": 0.0009237140584446024, "clip_ratio/low_min": 3.912757256330224e-05, "clip_ratio/region_mean": 0.0018021919458988123, "epoch": 0.10882639460068017, "grad_norm": 0.122602678835392, "learning_rate": 1e-06, "loss": 0.0383, "step": 1166 }, { "clip_ratio/high_max": 0.001917199326271657, "clip_ratio/high_mean": 0.0008443322867606184, "clip_ratio/low_mean": 0.0008314452061313204, "clip_ratio/low_min": 2.09067593459622e-05, "clip_ratio/region_mean": 0.001675777428317815, "epoch": 0.10891972770068074, "grad_norm": 0.11294093728065491, "learning_rate": 1e-06, "loss": 0.0112, "step": 1167 }, { "clip_ratio/high_max": 0.0022509836562676355, "clip_ratio/high_mean": 0.0009490211305092089, "clip_ratio/low_mean": 0.0009287308639613912, "clip_ratio/low_min": 3.8909884096938185e-05, "clip_ratio/region_mean": 0.0018777520162984729, "epoch": 0.10901306080068134, "grad_norm": 0.11199970543384552, "learning_rate": 1e-06, "loss": 0.0256, "step": 1168 }, { "clip_ratio/high_max": 0.0022230954527913127, "clip_ratio/high_mean": 0.0009541055169393076, "clip_ratio/low_mean": 0.0008088467002380639, "clip_ratio/low_min": 2.954328738269396e-05, "clip_ratio/region_mean": 0.0017629522117204033, "epoch": 0.10910639390068191, "grad_norm": 0.11580251902341843, "learning_rate": 1e-06, "loss": 0.0255, "step": 1169 }, { "clip_ratio/high_max": 0.002369122223171871, "clip_ratio/high_mean": 0.0008771092525421409, "clip_ratio/low_mean": 0.0010278150539306807, "clip_ratio/low_min": 9.947158287104685e-05, "clip_ratio/region_mean": 0.001904924305563327, "epoch": 0.10919972700068249, "grad_norm": 0.1623285710811615, "learning_rate": 1e-06, "loss": 0.0332, "step": 1170 }, { "clip_ratio/high_max": 0.0021901287545915693, "clip_ratio/high_mean": 0.0008944836845330428, "clip_ratio/low_mean": 0.0010959301725961268, "clip_ratio/low_min": 0.00015705118130426854, "clip_ratio/region_mean": 0.001990413846215233, "epoch": 0.10929306010068308, "grad_norm": 0.11249013245105743, "learning_rate": 1e-06, "loss": 0.0681, "step": 1171 }, { "clip_ratio/high_max": 0.0022758879204047844, "clip_ratio/high_mean": 0.0009372177155455574, "clip_ratio/low_mean": 0.0011285629698249977, "clip_ratio/low_min": 7.89888863437227e-05, "clip_ratio/region_mean": 0.002065780685370555, "epoch": 0.10938639320068366, "grad_norm": 0.11011063307523727, "learning_rate": 1e-06, "loss": 0.035, "step": 1172 }, { "clip_ratio/high_max": 0.002339449478313327, "clip_ratio/high_mean": 0.001067694782250328, "clip_ratio/low_mean": 0.000992456285530352, "clip_ratio/low_min": 0.00017182131250592647, "clip_ratio/region_mean": 0.002060151062323712, "epoch": 0.10947972630068425, "grad_norm": 0.12196346372365952, "learning_rate": 1e-06, "loss": -0.0016, "step": 1173 }, { "clip_ratio/high_max": 0.0024089510916383006, "clip_ratio/high_mean": 0.0010303717499482445, "clip_ratio/low_mean": 0.0008754592508921633, "clip_ratio/low_min": 7.280913996510208e-05, "clip_ratio/region_mean": 0.0019058309990214184, "epoch": 0.10957305940068483, "grad_norm": 0.12614992260932922, "learning_rate": 1e-06, "loss": 0.0249, "step": 1174 }, { "clip_ratio/high_max": 0.002716060036618728, "clip_ratio/high_mean": 0.0010410269733256428, "clip_ratio/low_mean": 0.0009715198139019776, "clip_ratio/low_min": 8.22897745820228e-05, "clip_ratio/region_mean": 0.002012546799960546, "epoch": 0.10966639250068541, "grad_norm": 0.11459046602249146, "learning_rate": 1e-06, "loss": 0.0375, "step": 1175 }, { "clip_ratio/high_max": 0.0025704871004563756, "clip_ratio/high_mean": 0.000968531661783345, "clip_ratio/low_mean": 0.0010568624966253992, "clip_ratio/low_min": 0.00010840465347428108, "clip_ratio/region_mean": 0.002025394198426511, "epoch": 0.109759725600686, "grad_norm": 0.13366645574569702, "learning_rate": 1e-06, "loss": 0.035, "step": 1176 }, { "clip_ratio/high_max": 0.0026539053724263795, "clip_ratio/high_mean": 0.0010798767252708785, "clip_ratio/low_mean": 0.0009525927907816367, "clip_ratio/low_min": 9.907875755743589e-05, "clip_ratio/region_mean": 0.0020324695433373563, "epoch": 0.10985305870068658, "grad_norm": 0.11082327365875244, "learning_rate": 1e-06, "loss": -0.0195, "step": 1177 }, { "clip_ratio/high_max": 0.002270565673825331, "clip_ratio/high_mean": 0.0009582634193066042, "clip_ratio/low_mean": 0.0009140212441707263, "clip_ratio/low_min": 0.00010293228569935309, "clip_ratio/region_mean": 0.0018722846507444046, "epoch": 0.10994639180068716, "grad_norm": 0.11979391425848007, "learning_rate": 1e-06, "loss": 0.0221, "step": 1178 }, { "clip_ratio/high_max": 0.0026901003657258116, "clip_ratio/high_mean": 0.0010208458297711331, "clip_ratio/low_mean": 0.0011379824045434361, "clip_ratio/low_min": 5.9258822147967294e-05, "clip_ratio/region_mean": 0.0021588282907032408, "epoch": 0.11003972490068775, "grad_norm": 0.11636406928300858, "learning_rate": 1e-06, "loss": 0.064, "step": 1179 }, { "clip_ratio/high_max": 0.002761453826678917, "clip_ratio/high_mean": 0.0010801016542245634, "clip_ratio/low_mean": 0.0011200225344509818, "clip_ratio/low_min": 0.0002103312972394633, "clip_ratio/region_mean": 0.0022001242541591637, "epoch": 0.11013305800068833, "grad_norm": 0.11787573248147964, "learning_rate": 1e-06, "loss": 0.0462, "step": 1180 }, { "clip_ratio/high_max": 0.002421198856609408, "clip_ratio/high_mean": 0.0008848877150740009, "clip_ratio/low_mean": 0.001016052139675594, "clip_ratio/low_min": 3.873339119309094e-05, "clip_ratio/region_mean": 0.0019009398311027326, "epoch": 0.11022639110068891, "grad_norm": 0.10136258602142334, "learning_rate": 1e-06, "loss": 0.0275, "step": 1181 }, { "clip_ratio/high_max": 0.002572277211584151, "clip_ratio/high_mean": 0.0011720775219146162, "clip_ratio/low_mean": 0.0008886789528332883, "clip_ratio/low_min": 6.724361264787149e-05, "clip_ratio/region_mean": 0.002060756491118809, "epoch": 0.1103197242006895, "grad_norm": 0.12229104340076447, "learning_rate": 1e-06, "loss": 0.0244, "step": 1182 }, { "clip_ratio/high_max": 0.0020963844071957283, "clip_ratio/high_mean": 0.0008298370248667197, "clip_ratio/low_mean": 0.0011408174177631736, "clip_ratio/low_min": 0.00014471824943029787, "clip_ratio/region_mean": 0.0019706544626387767, "epoch": 0.11041305730069008, "grad_norm": 0.13758879899978638, "learning_rate": 1e-06, "loss": 0.0794, "step": 1183 }, { "clip_ratio/high_max": 0.0026509791568969376, "clip_ratio/high_mean": 0.0011043364720535465, "clip_ratio/low_mean": 0.0008642825578135671, "clip_ratio/low_min": 8.106119821604807e-05, "clip_ratio/region_mean": 0.001968619086255785, "epoch": 0.11050639040069067, "grad_norm": 0.10574328899383545, "learning_rate": 1e-06, "loss": -0.0136, "step": 1184 }, { "clip_ratio/high_max": 0.0027124802581965923, "clip_ratio/high_mean": 0.0010716210999817122, "clip_ratio/low_mean": 0.0011267261397733819, "clip_ratio/low_min": 4.273506056051701e-05, "clip_ratio/region_mean": 0.002198347225203179, "epoch": 0.11059972350069125, "grad_norm": 0.12926967442035675, "learning_rate": 1e-06, "loss": 0.0123, "step": 1185 }, { "clip_ratio/high_max": 0.003039923161850311, "clip_ratio/high_mean": 0.0012348273485258687, "clip_ratio/low_mean": 0.0010891843958233949, "clip_ratio/low_min": 0.0001114938404498389, "clip_ratio/region_mean": 0.0023240117443492636, "epoch": 0.11069305660069183, "grad_norm": 0.12365727871656418, "learning_rate": 1e-06, "loss": 0.0058, "step": 1186 }, { "clip_ratio/high_max": 0.003038015987840481, "clip_ratio/high_mean": 0.0012616448439075612, "clip_ratio/low_mean": 0.0010106407644343562, "clip_ratio/low_min": 3.537572411005385e-05, "clip_ratio/region_mean": 0.0022722855646861717, "epoch": 0.11078638970069242, "grad_norm": 0.12358976900577545, "learning_rate": 1e-06, "loss": -0.0162, "step": 1187 }, { "clip_ratio/high_max": 0.0023848461678426247, "clip_ratio/high_mean": 0.000884216262420523, "clip_ratio/low_mean": 0.0011999135531368665, "clip_ratio/low_min": 0.00010298970664734952, "clip_ratio/region_mean": 0.0020841298101004213, "epoch": 0.110879722800693, "grad_norm": 0.11596240103244781, "learning_rate": 1e-06, "loss": 0.0658, "step": 1188 }, { "clip_ratio/high_max": 0.0022402134018193465, "clip_ratio/high_mean": 0.0009440933226869674, "clip_ratio/low_mean": 0.001158672104793368, "clip_ratio/low_min": 0.00011200694098079111, "clip_ratio/region_mean": 0.0021027653929195367, "epoch": 0.11097305590069358, "grad_norm": 0.12584152817726135, "learning_rate": 1e-06, "loss": 0.0446, "step": 1189 }, { "clip_ratio/high_max": 0.0025912981291185133, "clip_ratio/high_mean": 0.0010761734238258214, "clip_ratio/low_mean": 0.0011078929419454653, "clip_ratio/low_min": 0.0001212302940984955, "clip_ratio/region_mean": 0.002184066404879559, "epoch": 0.11106638900069417, "grad_norm": 0.10775712877511978, "learning_rate": 1e-06, "loss": 0.0154, "step": 1190 }, { "clip_ratio/high_max": 0.002431743203487713, "clip_ratio/high_mean": 0.0010769440195872448, "clip_ratio/low_mean": 0.00115745762741426, "clip_ratio/low_min": 0.00011558580354176229, "clip_ratio/region_mean": 0.0022344017270370387, "epoch": 0.11115972210069475, "grad_norm": 0.1186540350317955, "learning_rate": 1e-06, "loss": 0.0433, "step": 1191 }, { "clip_ratio/high_max": 0.0027180488250451162, "clip_ratio/high_mean": 0.0011467203294159845, "clip_ratio/low_mean": 0.0011393007735023275, "clip_ratio/low_min": 5.3872015996603295e-05, "clip_ratio/region_mean": 0.0022860210519866087, "epoch": 0.11125305520069532, "grad_norm": 0.12269347161054611, "learning_rate": 1e-06, "loss": 0.0189, "step": 1192 }, { "clip_ratio/high_max": 0.00250194763430045, "clip_ratio/high_mean": 0.0010861757618840784, "clip_ratio/low_mean": 0.0011375939429854043, "clip_ratio/low_min": 0.00011869199624925386, "clip_ratio/region_mean": 0.0022237696321099065, "epoch": 0.11134638830069592, "grad_norm": 0.10908220708370209, "learning_rate": 1e-06, "loss": 0.0148, "step": 1193 }, { "clip_ratio/high_max": 0.0028957383692613803, "clip_ratio/high_mean": 0.0011445467389421538, "clip_ratio/low_mean": 0.0011427075478422921, "clip_ratio/low_min": 4.933065974910278e-05, "clip_ratio/region_mean": 0.002287254268594552, "epoch": 0.1114397214006965, "grad_norm": 0.12823274731636047, "learning_rate": 1e-06, "loss": -0.0085, "step": 1194 }, { "clip_ratio/high_max": 0.002681483281776309, "clip_ratio/high_mean": 0.0011821445768873673, "clip_ratio/low_mean": 0.0011066065490012988, "clip_ratio/low_min": 5.7073930292972364e-05, "clip_ratio/region_mean": 0.0022887511222506873, "epoch": 0.11153305450069709, "grad_norm": 0.11433789134025574, "learning_rate": 1e-06, "loss": 0.035, "step": 1195 }, { "clip_ratio/high_max": 0.0023891531163826585, "clip_ratio/high_mean": 0.0010262292780680582, "clip_ratio/low_mean": 0.0010906782663369086, "clip_ratio/low_min": 7.337635179283097e-05, "clip_ratio/region_mean": 0.0021169075698708184, "epoch": 0.11162638760069767, "grad_norm": 0.11177698522806168, "learning_rate": 1e-06, "loss": 0.0186, "step": 1196 }, { "clip_ratio/high_max": 0.002394787035882473, "clip_ratio/high_mean": 0.0009900727891363204, "clip_ratio/low_mean": 0.0009499857387709199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019400585370021872, "epoch": 0.11171972070069824, "grad_norm": 0.10699784755706787, "learning_rate": 1e-06, "loss": 0.0081, "step": 1197 }, { "clip_ratio/high_max": 0.002460182695358526, "clip_ratio/high_mean": 0.0010587245360511588, "clip_ratio/low_mean": 0.0011826245608972386, "clip_ratio/low_min": 0.0001090927926270524, "clip_ratio/region_mean": 0.0022413490805774927, "epoch": 0.11181305380069884, "grad_norm": 0.1167849525809288, "learning_rate": 1e-06, "loss": 0.066, "step": 1198 }, { "clip_ratio/high_max": 0.002818029279296752, "clip_ratio/high_mean": 0.001097101248888066, "clip_ratio/low_mean": 0.0010746394636953482, "clip_ratio/low_min": 8.465612881991547e-05, "clip_ratio/region_mean": 0.0021717406416428275, "epoch": 0.11190638690069941, "grad_norm": 0.11420687288045883, "learning_rate": 1e-06, "loss": 0.0098, "step": 1199 }, { "clip_ratio/high_max": 0.002425856677291449, "clip_ratio/high_mean": 0.0009930106371029979, "clip_ratio/low_mean": 0.00104721345451253, "clip_ratio/low_min": 7.833142899471568e-05, "clip_ratio/region_mean": 0.002040224091615528, "epoch": 0.11199972000069999, "grad_norm": 0.112687848508358, "learning_rate": 1e-06, "loss": -0.004, "step": 1200 }, { "clip_ratio/high_max": 0.0026552399067441, "clip_ratio/high_mean": 0.001172763244539965, "clip_ratio/low_mean": 0.0009646833277656697, "clip_ratio/low_min": 6.048934119462501e-05, "clip_ratio/region_mean": 0.00213744658685755, "epoch": 0.11209305310070058, "grad_norm": 0.10748311877250671, "learning_rate": 1e-06, "loss": -0.0325, "step": 1201 }, { "clip_ratio/high_max": 0.0028617993521038443, "clip_ratio/high_mean": 0.0011784493435698096, "clip_ratio/low_mean": 0.0010744831679403433, "clip_ratio/low_min": 4.813675332115963e-05, "clip_ratio/region_mean": 0.002252932477858849, "epoch": 0.11218638620070116, "grad_norm": 0.11108695715665817, "learning_rate": 1e-06, "loss": 0.0023, "step": 1202 }, { "clip_ratio/high_max": 0.0023855465260567144, "clip_ratio/high_mean": 0.0010530286399443867, "clip_ratio/low_mean": 0.001124067030104925, "clip_ratio/low_min": 9.296734515373828e-05, "clip_ratio/region_mean": 0.002177095666411333, "epoch": 0.11227971930070174, "grad_norm": 0.12072212994098663, "learning_rate": 1e-06, "loss": 0.0305, "step": 1203 }, { "clip_ratio/high_max": 0.002847674175427528, "clip_ratio/high_mean": 0.0011625915067270398, "clip_ratio/low_mean": 0.0011322244063194375, "clip_ratio/low_min": 5.276225601846818e-05, "clip_ratio/region_mean": 0.002294815865752753, "epoch": 0.11237305240070233, "grad_norm": 0.13078653812408447, "learning_rate": 1e-06, "loss": 0.025, "step": 1204 }, { "clip_ratio/high_max": 0.0024592020781710744, "clip_ratio/high_mean": 0.0009955171444744337, "clip_ratio/low_mean": 0.001339192567684222, "clip_ratio/low_min": 0.00012011306171189062, "clip_ratio/region_mean": 0.002334709723072592, "epoch": 0.11246638550070291, "grad_norm": 0.1268586814403534, "learning_rate": 1e-06, "loss": 0.0436, "step": 1205 }, { "clip_ratio/high_max": 0.002364511958148796, "clip_ratio/high_mean": 0.00107653147460951, "clip_ratio/low_mean": 0.0011547120611794526, "clip_ratio/low_min": 0.0001686973173491424, "clip_ratio/region_mean": 0.0022312435394269414, "epoch": 0.1125597186007035, "grad_norm": 0.12013255059719086, "learning_rate": 1e-06, "loss": 0.0272, "step": 1206 }, { "clip_ratio/high_max": 0.002810508587572258, "clip_ratio/high_mean": 0.0011008361052518012, "clip_ratio/low_mean": 0.0012866086490248563, "clip_ratio/low_min": 4.414697286847513e-05, "clip_ratio/region_mean": 0.002387444765190594, "epoch": 0.11265305170070408, "grad_norm": 0.1061701849102974, "learning_rate": 1e-06, "loss": 0.0126, "step": 1207 }, { "clip_ratio/high_max": 0.002492391831765417, "clip_ratio/high_mean": 0.001137933533755131, "clip_ratio/low_mean": 0.0010346618673793273, "clip_ratio/low_min": 0.00016255233185802354, "clip_ratio/region_mean": 0.0021725954284192994, "epoch": 0.11274638480070466, "grad_norm": 0.1131354421377182, "learning_rate": 1e-06, "loss": -0.0271, "step": 1208 }, { "clip_ratio/high_max": 0.0026482728535484057, "clip_ratio/high_mean": 0.0011294639216430369, "clip_ratio/low_mean": 0.0011897526728716912, "clip_ratio/low_min": 7.585861749248579e-05, "clip_ratio/region_mean": 0.0023192165317595936, "epoch": 0.11283971790070525, "grad_norm": 0.12359905987977982, "learning_rate": 1e-06, "loss": 0.0121, "step": 1209 }, { "clip_ratio/high_max": 0.0024440104971290566, "clip_ratio/high_mean": 0.0011161999900650699, "clip_ratio/low_mean": 0.0012108633454772644, "clip_ratio/low_min": 0.00013863556250726106, "clip_ratio/region_mean": 0.002327063339180313, "epoch": 0.11293305100070583, "grad_norm": 0.11111761629581451, "learning_rate": 1e-06, "loss": 0.0163, "step": 1210 }, { "clip_ratio/high_max": 0.0029320836038095877, "clip_ratio/high_mean": 0.001194272601424018, "clip_ratio/low_mean": 0.0012606735308509087, "clip_ratio/low_min": 6.907318675075658e-05, "clip_ratio/region_mean": 0.0024549460722482763, "epoch": 0.11302638410070641, "grad_norm": 0.11577598750591278, "learning_rate": 1e-06, "loss": 0.0136, "step": 1211 }, { "clip_ratio/high_max": 0.002413258989690803, "clip_ratio/high_mean": 0.0011087110833614133, "clip_ratio/low_mean": 0.0012625925155589357, "clip_ratio/low_min": 0.000203817498004355, "clip_ratio/region_mean": 0.002371303620748222, "epoch": 0.113119717200707, "grad_norm": 0.11881434172391891, "learning_rate": 1e-06, "loss": 0.0209, "step": 1212 }, { "clip_ratio/high_max": 0.002713003232202027, "clip_ratio/high_mean": 0.001119829361414304, "clip_ratio/low_mean": 0.0011212391655135434, "clip_ratio/low_min": 2.8261732950340956e-05, "clip_ratio/region_mean": 0.0022410686215152964, "epoch": 0.11321305030070758, "grad_norm": 0.10956117510795593, "learning_rate": 1e-06, "loss": 0.0131, "step": 1213 }, { "clip_ratio/high_max": 0.002357284181925934, "clip_ratio/high_mean": 0.0010268216719850898, "clip_ratio/low_mean": 0.0013034044604864903, "clip_ratio/low_min": 0.00017426836348022334, "clip_ratio/region_mean": 0.0023302261470234953, "epoch": 0.11330638340070817, "grad_norm": 0.1119605153799057, "learning_rate": 1e-06, "loss": 0.0272, "step": 1214 }, { "clip_ratio/high_max": 0.0025026722796610557, "clip_ratio/high_mean": 0.001099099430575734, "clip_ratio/low_mean": 0.0009610571796656586, "clip_ratio/low_min": 2.4329051484528463e-05, "clip_ratio/region_mean": 0.0020601566357072443, "epoch": 0.11339971650070875, "grad_norm": 0.11132616549730301, "learning_rate": 1e-06, "loss": 0.0187, "step": 1215 }, { "clip_ratio/high_max": 0.002510435779186082, "clip_ratio/high_mean": 0.0010765408451334224, "clip_ratio/low_mean": 0.0011286608241789509, "clip_ratio/low_min": 6.877381201775279e-05, "clip_ratio/region_mean": 0.0022052016720408574, "epoch": 0.11349304960070933, "grad_norm": 0.11213754117488861, "learning_rate": 1e-06, "loss": 0.0254, "step": 1216 }, { "clip_ratio/high_max": 0.002374637821048964, "clip_ratio/high_mean": 0.0011195650931767887, "clip_ratio/low_mean": 0.0010895597406488378, "clip_ratio/low_min": 0.00017216658852703404, "clip_ratio/region_mean": 0.0022091248756623827, "epoch": 0.11358638270070992, "grad_norm": 0.11881782114505768, "learning_rate": 1e-06, "loss": -0.0014, "step": 1217 }, { "clip_ratio/high_max": 0.002377979391894769, "clip_ratio/high_mean": 0.0010857104280148633, "clip_ratio/low_mean": 0.0012226139442645945, "clip_ratio/low_min": 0.00013999407201481517, "clip_ratio/region_mean": 0.0023083243722794577, "epoch": 0.1136797158007105, "grad_norm": 0.11346249282360077, "learning_rate": 1e-06, "loss": 0.0499, "step": 1218 }, { "clip_ratio/high_max": 0.0024042427467065863, "clip_ratio/high_mean": 0.0010952568009088282, "clip_ratio/low_mean": 0.0013081053402856924, "clip_ratio/low_min": 0.00011137879482703283, "clip_ratio/region_mean": 0.002403362130280584, "epoch": 0.11377304890071108, "grad_norm": 0.11177940666675568, "learning_rate": 1e-06, "loss": 0.0116, "step": 1219 }, { "clip_ratio/high_max": 0.0025204455014318228, "clip_ratio/high_mean": 0.0010780549309856724, "clip_ratio/low_mean": 0.001270218737772666, "clip_ratio/low_min": 0.00018541646204539575, "clip_ratio/region_mean": 0.0023482737378799357, "epoch": 0.11386638200071167, "grad_norm": 0.11209125071763992, "learning_rate": 1e-06, "loss": 0.0026, "step": 1220 }, { "clip_ratio/high_max": 0.002907198140746914, "clip_ratio/high_mean": 0.001148980756624951, "clip_ratio/low_mean": 0.0009508351067779586, "clip_ratio/low_min": 0.00011329497647238895, "clip_ratio/region_mean": 0.0020998158724978566, "epoch": 0.11395971510071225, "grad_norm": 0.12342007458209991, "learning_rate": 1e-06, "loss": 0.009, "step": 1221 }, { "clip_ratio/high_max": 0.0024654370427015238, "clip_ratio/high_mean": 0.0010780396587506402, "clip_ratio/low_mean": 0.001451374962925911, "clip_ratio/low_min": 7.036116858216701e-05, "clip_ratio/region_mean": 0.0025294147053500637, "epoch": 0.11405304820071283, "grad_norm": 0.10980355739593506, "learning_rate": 1e-06, "loss": 0.0583, "step": 1222 }, { "clip_ratio/high_max": 0.0027817566369776614, "clip_ratio/high_mean": 0.001279632906516781, "clip_ratio/low_mean": 0.0011250143852521433, "clip_ratio/low_min": 5.3364360610430595e-05, "clip_ratio/region_mean": 0.0024046473554335535, "epoch": 0.11414638130071342, "grad_norm": 0.11107853055000305, "learning_rate": 1e-06, "loss": 0.0267, "step": 1223 }, { "clip_ratio/high_max": 0.002143105535651557, "clip_ratio/high_mean": 0.0009024186274473323, "clip_ratio/low_mean": 0.0011317859098198824, "clip_ratio/low_min": 0.00016458829577459255, "clip_ratio/region_mean": 0.0020342045318102464, "epoch": 0.114239714400714, "grad_norm": 0.11549653857946396, "learning_rate": 1e-06, "loss": 0.0252, "step": 1224 }, { "clip_ratio/high_max": 0.002626558351039421, "clip_ratio/high_mean": 0.001059851785612409, "clip_ratio/low_mean": 0.0010941274922515731, "clip_ratio/low_min": 5.223984953772742e-05, "clip_ratio/region_mean": 0.002153979272407014, "epoch": 0.11433304750071459, "grad_norm": 0.12288986146450043, "learning_rate": 1e-06, "loss": 0.0386, "step": 1225 }, { "clip_ratio/high_max": 0.003055918372410815, "clip_ratio/high_mean": 0.001187608351756353, "clip_ratio/low_mean": 0.0013058970871497877, "clip_ratio/low_min": 0.00016501074242114555, "clip_ratio/region_mean": 0.0024935054389061406, "epoch": 0.11442638060071517, "grad_norm": 0.12508055567741394, "learning_rate": 1e-06, "loss": 0.0487, "step": 1226 }, { "clip_ratio/high_max": 0.002487231518898625, "clip_ratio/high_mean": 0.0010719842903199606, "clip_ratio/low_mean": 0.0010408343578092172, "clip_ratio/low_min": 1.759572114679031e-05, "clip_ratio/region_mean": 0.002112818619934842, "epoch": 0.11451971370071574, "grad_norm": 0.09493252635002136, "learning_rate": 1e-06, "loss": 0.0066, "step": 1227 }, { "clip_ratio/high_max": 0.002374014860833995, "clip_ratio/high_mean": 0.0010484732974873623, "clip_ratio/low_mean": 0.0012189659973955713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002267439318529796, "epoch": 0.11461304680071634, "grad_norm": 0.10654571652412415, "learning_rate": 1e-06, "loss": 0.0278, "step": 1228 }, { "clip_ratio/high_max": 0.002509802689019125, "clip_ratio/high_mean": 0.001009592013360816, "clip_ratio/low_mean": 0.0011653611109068152, "clip_ratio/low_min": 6.169331390992738e-05, "clip_ratio/region_mean": 0.002174953064240981, "epoch": 0.11470637990071691, "grad_norm": 0.10817123204469681, "learning_rate": 1e-06, "loss": 0.087, "step": 1229 }, { "clip_ratio/high_max": 0.0023047270078677684, "clip_ratio/high_mean": 0.0010717355617089197, "clip_ratio/low_mean": 0.0012827192185795866, "clip_ratio/low_min": 8.209105726564303e-05, "clip_ratio/region_mean": 0.0023544548166682944, "epoch": 0.1147997130007175, "grad_norm": 0.13143272697925568, "learning_rate": 1e-06, "loss": 0.0102, "step": 1230 }, { "clip_ratio/high_max": 0.002837826672475785, "clip_ratio/high_mean": 0.0011027122345694806, "clip_ratio/low_mean": 0.0010226727808912983, "clip_ratio/low_min": 0.00017012880198308267, "clip_ratio/region_mean": 0.0021253850136417896, "epoch": 0.11489304610071809, "grad_norm": 0.1140839084982872, "learning_rate": 1e-06, "loss": 0.0186, "step": 1231 }, { "clip_ratio/high_max": 0.0028421363240340725, "clip_ratio/high_mean": 0.0013222473535279278, "clip_ratio/low_mean": 0.0011667769504128955, "clip_ratio/low_min": 6.35658389001037e-05, "clip_ratio/region_mean": 0.0024890242639230564, "epoch": 0.11498637920071866, "grad_norm": 0.12433359026908875, "learning_rate": 1e-06, "loss": -0.0019, "step": 1232 }, { "clip_ratio/high_max": 0.002858025320165325, "clip_ratio/high_mean": 0.0011508914467412978, "clip_ratio/low_mean": 0.001295942667638883, "clip_ratio/low_min": 0.0001240560495716636, "clip_ratio/region_mean": 0.002446834070724435, "epoch": 0.11507971230071924, "grad_norm": 0.11896398663520813, "learning_rate": 1e-06, "loss": 0.0288, "step": 1233 }, { "clip_ratio/high_max": 0.0029834737651981413, "clip_ratio/high_mean": 0.0013063383048574906, "clip_ratio/low_mean": 0.0011824090233858442, "clip_ratio/low_min": 7.498757895518793e-05, "clip_ratio/region_mean": 0.002488747355528176, "epoch": 0.11517304540071983, "grad_norm": 0.1223648339509964, "learning_rate": 1e-06, "loss": -0.0056, "step": 1234 }, { "clip_ratio/high_max": 0.0026859227582463063, "clip_ratio/high_mean": 0.0012197845499031246, "clip_ratio/low_mean": 0.0012004819072899409, "clip_ratio/low_min": 2.5783829187275842e-05, "clip_ratio/region_mean": 0.0024202664571930654, "epoch": 0.11526637850072041, "grad_norm": 0.11144250631332397, "learning_rate": 1e-06, "loss": -0.001, "step": 1235 }, { "clip_ratio/high_max": 0.0024210904448409565, "clip_ratio/high_mean": 0.0009808355289351312, "clip_ratio/low_mean": 0.0012403303444443736, "clip_ratio/low_min": 0.00012106583926652092, "clip_ratio/region_mean": 0.002221165836090222, "epoch": 0.115359711600721, "grad_norm": 0.11551078408956528, "learning_rate": 1e-06, "loss": 0.0356, "step": 1236 }, { "clip_ratio/high_max": 0.002920305007137358, "clip_ratio/high_mean": 0.001245065202965634, "clip_ratio/low_mean": 0.0011656327660602983, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024106979253701866, "epoch": 0.11545304470072158, "grad_norm": 0.1116659939289093, "learning_rate": 1e-06, "loss": -0.0062, "step": 1237 }, { "clip_ratio/high_max": 0.002577817314886488, "clip_ratio/high_mean": 0.0010785205613501603, "clip_ratio/low_mean": 0.0011799832682299893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002258503831399139, "epoch": 0.11554637780072216, "grad_norm": 0.11359122395515442, "learning_rate": 1e-06, "loss": 0.0212, "step": 1238 }, { "clip_ratio/high_max": 0.0031608296631020494, "clip_ratio/high_mean": 0.0011929350766877178, "clip_ratio/low_mean": 0.001161477073765127, "clip_ratio/low_min": 0.00010525904872338288, "clip_ratio/region_mean": 0.0023544121795566753, "epoch": 0.11563971090072275, "grad_norm": 0.1082422137260437, "learning_rate": 1e-06, "loss": 0.0178, "step": 1239 }, { "clip_ratio/high_max": 0.00226152062532492, "clip_ratio/high_mean": 0.001028628483254579, "clip_ratio/low_mean": 0.0012177384523965884, "clip_ratio/low_min": 0.00010344469046685845, "clip_ratio/region_mean": 0.0022463669192802627, "epoch": 0.11573304400072333, "grad_norm": 0.1235145777463913, "learning_rate": 1e-06, "loss": 0.0399, "step": 1240 }, { "clip_ratio/high_max": 0.0025888833552016877, "clip_ratio/high_mean": 0.0011082125311077107, "clip_ratio/low_mean": 0.0013110085601510946, "clip_ratio/low_min": 0.00019853362937283237, "clip_ratio/region_mean": 0.0024192210330511443, "epoch": 0.11582637710072391, "grad_norm": 0.1176135390996933, "learning_rate": 1e-06, "loss": 0.0554, "step": 1241 }, { "clip_ratio/high_max": 0.0025159292927128263, "clip_ratio/high_mean": 0.0011307577733532526, "clip_ratio/low_mean": 0.0011334201954014134, "clip_ratio/low_min": 5.870101995242294e-05, "clip_ratio/region_mean": 0.002264177950564772, "epoch": 0.1159197102007245, "grad_norm": 0.21855543553829193, "learning_rate": 1e-06, "loss": 0.0366, "step": 1242 }, { "clip_ratio/high_max": 0.0027163535414729267, "clip_ratio/high_mean": 0.0011778916195908096, "clip_ratio/low_mean": 0.0011235247147851624, "clip_ratio/low_min": 5.8103820265387185e-05, "clip_ratio/region_mean": 0.002301416352565866, "epoch": 0.11601304330072508, "grad_norm": 0.11948242783546448, "learning_rate": 1e-06, "loss": 0.0043, "step": 1243 }, { "clip_ratio/high_max": 0.0030192918711691163, "clip_ratio/high_mean": 0.0012100219064450357, "clip_ratio/low_mean": 0.0012370348813419696, "clip_ratio/low_min": 0.00017882119936984964, "clip_ratio/region_mean": 0.0024470567805110477, "epoch": 0.11610637640072566, "grad_norm": 0.11817143857479095, "learning_rate": 1e-06, "loss": 0.0076, "step": 1244 }, { "clip_ratio/high_max": 0.002700486373214517, "clip_ratio/high_mean": 0.0010983870961354114, "clip_ratio/low_mean": 0.0012288219295442104, "clip_ratio/low_min": 8.856002023094334e-05, "clip_ratio/region_mean": 0.0023272090547834523, "epoch": 0.11619970950072625, "grad_norm": 0.13108079135417938, "learning_rate": 1e-06, "loss": -0.0057, "step": 1245 }, { "clip_ratio/high_max": 0.002508375415345654, "clip_ratio/high_mean": 0.001042072781274328, "clip_ratio/low_mean": 0.001193254574900493, "clip_ratio/low_min": 0.00010190653847530484, "clip_ratio/region_mean": 0.002235327374364715, "epoch": 0.11629304260072683, "grad_norm": 0.1131974533200264, "learning_rate": 1e-06, "loss": 0.0278, "step": 1246 }, { "clip_ratio/high_max": 0.002657712480868213, "clip_ratio/high_mean": 0.0012290649574424606, "clip_ratio/low_mean": 0.0013168034820409957, "clip_ratio/low_min": 0.0001341599036095431, "clip_ratio/region_mean": 0.002545868410379626, "epoch": 0.11638637570072742, "grad_norm": 0.10812344402074814, "learning_rate": 1e-06, "loss": 0.0095, "step": 1247 }, { "clip_ratio/high_max": 0.002726197002630215, "clip_ratio/high_mean": 0.0011453978841018397, "clip_ratio/low_mean": 0.0014631268313678447, "clip_ratio/low_min": 4.452624125406146e-05, "clip_ratio/region_mean": 0.0026085248246090487, "epoch": 0.116479708800728, "grad_norm": 0.1212625801563263, "learning_rate": 1e-06, "loss": 0.04, "step": 1248 }, { "clip_ratio/high_max": 0.003043636112124659, "clip_ratio/high_mean": 0.0011938805328099988, "clip_ratio/low_mean": 0.0014511906229017768, "clip_ratio/low_min": 0.00010345144255552441, "clip_ratio/region_mean": 0.0026450710865901783, "epoch": 0.11657304190072858, "grad_norm": 0.11640874296426773, "learning_rate": 1e-06, "loss": 0.0203, "step": 1249 }, { "clip_ratio/high_max": 0.002968999164295383, "clip_ratio/high_mean": 0.0013741601069341414, "clip_ratio/low_mean": 0.0013523021807486657, "clip_ratio/low_min": 0.0001274940004805103, "clip_ratio/region_mean": 0.0027264622185612097, "epoch": 0.11666637500072917, "grad_norm": 0.11351455748081207, "learning_rate": 1e-06, "loss": 0.0063, "step": 1250 }, { "clip_ratio/high_max": 0.0026110495819011703, "clip_ratio/high_mean": 0.0010721962080424419, "clip_ratio/low_mean": 0.0011775215289162588, "clip_ratio/low_min": 8.510787483828608e-05, "clip_ratio/region_mean": 0.0022497176832985133, "epoch": 0.11675970810072975, "grad_norm": 0.1033560186624527, "learning_rate": 1e-06, "loss": 0.0082, "step": 1251 }, { "clip_ratio/high_max": 0.0027876603744516615, "clip_ratio/high_mean": 0.0011413539505156223, "clip_ratio/low_mean": 0.0013970071740914136, "clip_ratio/low_min": 9.984825373976491e-05, "clip_ratio/region_mean": 0.002538361106417142, "epoch": 0.11685304120073033, "grad_norm": 0.1232597753405571, "learning_rate": 1e-06, "loss": 0.0044, "step": 1252 }, { "clip_ratio/high_max": 0.0024598976015113294, "clip_ratio/high_mean": 0.0010205740181845613, "clip_ratio/low_mean": 0.001317896956607001, "clip_ratio/low_min": 0.00016895811677386519, "clip_ratio/region_mean": 0.0023384709857054986, "epoch": 0.11694637430073092, "grad_norm": 0.1106187179684639, "learning_rate": 1e-06, "loss": 0.0595, "step": 1253 }, { "clip_ratio/high_max": 0.0024557662254665047, "clip_ratio/high_mean": 0.0011004724274243927, "clip_ratio/low_mean": 0.0012089501205991837, "clip_ratio/low_min": 0.00011404952419979963, "clip_ratio/region_mean": 0.0023094226053217426, "epoch": 0.1170397074007315, "grad_norm": 0.11428594589233398, "learning_rate": 1e-06, "loss": 0.0346, "step": 1254 }, { "clip_ratio/high_max": 0.0028476766528910957, "clip_ratio/high_mean": 0.0011265062785241753, "clip_ratio/low_mean": 0.0014320096834126161, "clip_ratio/low_min": 0.00011900360959771206, "clip_ratio/region_mean": 0.0025585159310139716, "epoch": 0.11713304050073207, "grad_norm": 0.11334823817014694, "learning_rate": 1e-06, "loss": 0.0491, "step": 1255 }, { "clip_ratio/high_max": 0.002735421781835612, "clip_ratio/high_mean": 0.0011139361240566359, "clip_ratio/low_mean": 0.0012695090517809149, "clip_ratio/low_min": 0.00012365471775410697, "clip_ratio/region_mean": 0.002383445178566035, "epoch": 0.11722637360073267, "grad_norm": 0.10950180143117905, "learning_rate": 1e-06, "loss": 0.0195, "step": 1256 }, { "clip_ratio/high_max": 0.0029859409405617043, "clip_ratio/high_mean": 0.0012715082957583945, "clip_ratio/low_mean": 0.0011775784496421693, "clip_ratio/low_min": 2.7861287890118547e-05, "clip_ratio/region_mean": 0.0024490867217537016, "epoch": 0.11731970670073325, "grad_norm": 0.13847064971923828, "learning_rate": 1e-06, "loss": -0.0137, "step": 1257 }, { "clip_ratio/high_max": 0.002971746445837198, "clip_ratio/high_mean": 0.0012477363616199, "clip_ratio/low_mean": 0.0012153943607700057, "clip_ratio/low_min": 0.00013913389011577237, "clip_ratio/region_mean": 0.002463130746036768, "epoch": 0.11741303980073384, "grad_norm": 0.25400254130363464, "learning_rate": 1e-06, "loss": 0.0225, "step": 1258 }, { "clip_ratio/high_max": 0.0024867820902727544, "clip_ratio/high_mean": 0.0011244415218243375, "clip_ratio/low_mean": 0.0012977943297300953, "clip_ratio/low_min": 5.330502062861342e-05, "clip_ratio/region_mean": 0.0024222358551924117, "epoch": 0.11750637290073442, "grad_norm": 0.10334615409374237, "learning_rate": 1e-06, "loss": 0.0368, "step": 1259 }, { "clip_ratio/high_max": 0.002701937635720242, "clip_ratio/high_mean": 0.0011171300920977956, "clip_ratio/low_mean": 0.0014942523157515097, "clip_ratio/low_min": 0.00023630415125808213, "clip_ratio/region_mean": 0.0026113824278581887, "epoch": 0.117599706000735, "grad_norm": 0.11032275855541229, "learning_rate": 1e-06, "loss": 0.0342, "step": 1260 }, { "clip_ratio/high_max": 0.0025608039650251158, "clip_ratio/high_mean": 0.0011437597167969216, "clip_ratio/low_mean": 0.0013113924251229037, "clip_ratio/low_min": 0.00012246981896169018, "clip_ratio/region_mean": 0.0024551521419198252, "epoch": 0.11769303910073559, "grad_norm": 0.11309037357568741, "learning_rate": 1e-06, "loss": 0.0111, "step": 1261 }, { "clip_ratio/high_max": 0.002589380521385465, "clip_ratio/high_mean": 0.0011820303407148458, "clip_ratio/low_mean": 0.0013472383361659013, "clip_ratio/low_min": 6.480483898485545e-05, "clip_ratio/region_mean": 0.0025292686113971286, "epoch": 0.11778637220073616, "grad_norm": 0.12196750193834305, "learning_rate": 1e-06, "loss": 0.043, "step": 1262 }, { "clip_ratio/high_max": 0.0033571323147043586, "clip_ratio/high_mean": 0.001306992964600795, "clip_ratio/low_mean": 0.0012608893266587984, "clip_ratio/low_min": 1.7443482647649944e-05, "clip_ratio/region_mean": 0.0025678823367343284, "epoch": 0.11787970530073674, "grad_norm": 0.11040571331977844, "learning_rate": 1e-06, "loss": 0.0159, "step": 1263 }, { "clip_ratio/high_max": 0.002608313436212484, "clip_ratio/high_mean": 0.0010171768517466262, "clip_ratio/low_mean": 0.0012925037626700941, "clip_ratio/low_min": 5.325947859091684e-05, "clip_ratio/region_mean": 0.0023096806326066144, "epoch": 0.11797303840073733, "grad_norm": 0.11099119484424591, "learning_rate": 1e-06, "loss": 0.0894, "step": 1264 }, { "clip_ratio/high_max": 0.003116911648248788, "clip_ratio/high_mean": 0.0011670192616293207, "clip_ratio/low_mean": 0.0012749529087159317, "clip_ratio/low_min": 6.943041626072954e-05, "clip_ratio/region_mean": 0.0024419721885351464, "epoch": 0.11806637150073791, "grad_norm": 0.11448632925748825, "learning_rate": 1e-06, "loss": 0.0446, "step": 1265 }, { "clip_ratio/high_max": 0.002724581594520714, "clip_ratio/high_mean": 0.0011624906546785496, "clip_ratio/low_mean": 0.001213326169818174, "clip_ratio/low_min": 1.5217920918075833e-05, "clip_ratio/region_mean": 0.0023758168463245966, "epoch": 0.1181597046007385, "grad_norm": 0.11675304919481277, "learning_rate": 1e-06, "loss": 0.0414, "step": 1266 }, { "clip_ratio/high_max": 0.0031477264710702, "clip_ratio/high_mean": 0.001277949808354606, "clip_ratio/low_mean": 0.0013949320564279333, "clip_ratio/low_min": 5.5809789955674205e-05, "clip_ratio/region_mean": 0.0026728818338597193, "epoch": 0.11825303770073908, "grad_norm": 0.11598726361989975, "learning_rate": 1e-06, "loss": 0.005, "step": 1267 }, { "clip_ratio/high_max": 0.0032902609746088274, "clip_ratio/high_mean": 0.0013908240071032196, "clip_ratio/low_mean": 0.0012551409927255008, "clip_ratio/low_min": 4.173062370682601e-05, "clip_ratio/region_mean": 0.0026459649816388264, "epoch": 0.11834637080073966, "grad_norm": 0.1187095046043396, "learning_rate": 1e-06, "loss": -0.01, "step": 1268 }, { "clip_ratio/high_max": 0.0027369569215807132, "clip_ratio/high_mean": 0.0011593812832870753, "clip_ratio/low_mean": 0.0013337703494471498, "clip_ratio/low_min": 0.00010947536611638498, "clip_ratio/region_mean": 0.0024931516309152357, "epoch": 0.11843970390074025, "grad_norm": 0.1019117459654808, "learning_rate": 1e-06, "loss": 0.025, "step": 1269 }, { "clip_ratio/high_max": 0.003011727865668945, "clip_ratio/high_mean": 0.001362494640488876, "clip_ratio/low_mean": 0.0012878921734227333, "clip_ratio/low_min": 7.200618165370543e-05, "clip_ratio/region_mean": 0.0026503868066356517, "epoch": 0.11853303700074083, "grad_norm": 0.11724326759576797, "learning_rate": 1e-06, "loss": 0.0091, "step": 1270 }, { "clip_ratio/high_max": 0.0026873566457652487, "clip_ratio/high_mean": 0.0011771650388254784, "clip_ratio/low_mean": 0.0012928696905873949, "clip_ratio/low_min": 9.021845107781701e-05, "clip_ratio/region_mean": 0.0024700347130419686, "epoch": 0.11862637010074141, "grad_norm": 0.1181105300784111, "learning_rate": 1e-06, "loss": 0.0419, "step": 1271 }, { "clip_ratio/high_max": 0.002751998181338422, "clip_ratio/high_mean": 0.001230129593750462, "clip_ratio/low_mean": 0.0011646008606476244, "clip_ratio/low_min": 6.25976244919002e-05, "clip_ratio/region_mean": 0.0023947304653120227, "epoch": 0.118719703200742, "grad_norm": 0.15212400257587433, "learning_rate": 1e-06, "loss": 0.0116, "step": 1272 }, { "clip_ratio/high_max": 0.00277440019272035, "clip_ratio/high_mean": 0.001213189087138744, "clip_ratio/low_mean": 0.0013494054073817097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025625945199863054, "epoch": 0.11881303630074258, "grad_norm": 0.12205009162425995, "learning_rate": 1e-06, "loss": 0.0167, "step": 1273 }, { "clip_ratio/high_max": 0.0026543722706264816, "clip_ratio/high_mean": 0.0010807045255205594, "clip_ratio/low_mean": 0.0011866507338709198, "clip_ratio/low_min": 5.900519681745209e-05, "clip_ratio/region_mean": 0.002267355281219352, "epoch": 0.11890636940074316, "grad_norm": 0.11266744136810303, "learning_rate": 1e-06, "loss": 0.0367, "step": 1274 }, { "clip_ratio/high_max": 0.0026573524592095055, "clip_ratio/high_mean": 0.0012090996824554168, "clip_ratio/low_mean": 0.0011447728320490569, "clip_ratio/low_min": 1.5903307939879596e-05, "clip_ratio/region_mean": 0.0023538724781246856, "epoch": 0.11899970250074375, "grad_norm": 0.11570925265550613, "learning_rate": 1e-06, "loss": 0.0172, "step": 1275 }, { "clip_ratio/high_max": 0.002645457032485865, "clip_ratio/high_mean": 0.0011059771932195872, "clip_ratio/low_mean": 0.0014449966365646105, "clip_ratio/low_min": 9.428643625142286e-05, "clip_ratio/region_mean": 0.0025509738188702613, "epoch": 0.11909303560074433, "grad_norm": 0.11842282116413116, "learning_rate": 1e-06, "loss": 0.0711, "step": 1276 }, { "clip_ratio/high_max": 0.00260804715071572, "clip_ratio/high_mean": 0.0012167907734692562, "clip_ratio/low_mean": 0.0014779315315536223, "clip_ratio/low_min": 0.0003104298266407568, "clip_ratio/region_mean": 0.0026947223668685183, "epoch": 0.11918636870074492, "grad_norm": 0.11389115452766418, "learning_rate": 1e-06, "loss": 0.0603, "step": 1277 }, { "clip_ratio/high_max": 0.003054427172173746, "clip_ratio/high_mean": 0.0012806193590222392, "clip_ratio/low_mean": 0.0013454707659548149, "clip_ratio/low_min": 0.00016149640941875987, "clip_ratio/region_mean": 0.0026260901577188633, "epoch": 0.1192797018007455, "grad_norm": 0.11626116186380386, "learning_rate": 1e-06, "loss": 0.0025, "step": 1278 }, { "clip_ratio/high_max": 0.002863746318325866, "clip_ratio/high_mean": 0.0013735457505390514, "clip_ratio/low_mean": 0.001146861814049771, "clip_ratio/low_min": 0.0001406564133503707, "clip_ratio/region_mean": 0.002520407513657119, "epoch": 0.11937303490074608, "grad_norm": 0.11570712178945541, "learning_rate": 1e-06, "loss": -0.042, "step": 1279 }, { "clip_ratio/high_max": 0.0033707413786032703, "clip_ratio/high_mean": 0.0013473365615936927, "clip_ratio/low_mean": 0.0014435419070650823, "clip_ratio/low_min": 0.0001292692049901234, "clip_ratio/region_mean": 0.0027908784395549446, "epoch": 0.11946636800074667, "grad_norm": 0.11858441680669785, "learning_rate": 1e-06, "loss": 0.0331, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010393415178571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 610.109375, "completions/mean_terminated_length": 573.4985961914062, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.11955970110074725, "grad_norm": 0.12895292043685913, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 890648536.0, "reward": 0.6309814453125, "reward_std": 0.17484650015830994, "rewards/simpleverify_reward/mean": 0.6309814453125, "rewards/simpleverify_reward/std": 0.4825412333011627, "step": 1281 }, { "clip_ratio/high_max": 0.0021441451826831326, "clip_ratio/high_mean": 0.0008176259252650198, "clip_ratio/low_mean": 0.00047146658016572474, "clip_ratio/low_min": 1.3306365872267634e-05, "clip_ratio/region_mean": 0.0012890925172541756, "epoch": 0.11965303420074783, "grad_norm": 0.10670824348926544, "learning_rate": 1e-06, "loss": 0.0576, "step": 1282 }, { "clip_ratio/high_max": 0.0020660920708905905, "clip_ratio/high_mean": 0.0008382567266380647, "clip_ratio/low_mean": 0.0005422829344752245, "clip_ratio/low_min": 9.95539994619321e-06, "clip_ratio/region_mean": 0.001380539673846215, "epoch": 0.11974636730074842, "grad_norm": 0.12438701838254929, "learning_rate": 1e-06, "loss": 0.0313, "step": 1283 }, { "clip_ratio/high_max": 0.0019648446977953427, "clip_ratio/high_mean": 0.0008790952506387839, "clip_ratio/low_mean": 0.00047001554048620164, "clip_ratio/low_min": 1.4626725715061184e-05, "clip_ratio/region_mean": 0.001349110811133869, "epoch": 0.119839700400749, "grad_norm": 0.12401069700717926, "learning_rate": 1e-06, "loss": 0.0185, "step": 1284 }, { "clip_ratio/high_max": 0.0021877586259506643, "clip_ratio/high_mean": 0.0008226343379647005, "clip_ratio/low_mean": 0.000532995886601384, "clip_ratio/low_min": 1.253761274710996e-05, "clip_ratio/region_mean": 0.0013556302292272449, "epoch": 0.11993303350074958, "grad_norm": 0.1298038363456726, "learning_rate": 1e-06, "loss": 0.0493, "step": 1285 }, { "clip_ratio/high_max": 0.0021752143802586943, "clip_ratio/high_mean": 0.000922163588256808, "clip_ratio/low_mean": 0.000500899364851648, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014230629240046255, "epoch": 0.12002636660075017, "grad_norm": 0.11620011180639267, "learning_rate": 1e-06, "loss": 0.0086, "step": 1286 }, { "clip_ratio/high_max": 0.002193513824749971, "clip_ratio/high_mean": 0.0008912166158552282, "clip_ratio/low_mean": 0.0007291550282388926, "clip_ratio/low_min": 0.00010152971844945569, "clip_ratio/region_mean": 0.0016203715968003962, "epoch": 0.12011969970075075, "grad_norm": 0.12693192064762115, "learning_rate": 1e-06, "loss": 0.0181, "step": 1287 }, { "clip_ratio/high_max": 0.0026443946044309996, "clip_ratio/high_mean": 0.0009856653232418466, "clip_ratio/low_mean": 0.0007984113972270279, "clip_ratio/low_min": 9.48485912886099e-05, "clip_ratio/region_mean": 0.0017840767213783693, "epoch": 0.12021303280075134, "grad_norm": 0.1853368878364563, "learning_rate": 1e-06, "loss": 0.029, "step": 1288 }, { "clip_ratio/high_max": 0.0019395440285734367, "clip_ratio/high_mean": 0.0006901731085235951, "clip_ratio/low_mean": 0.0007944506596686551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014846237972960807, "epoch": 0.12030636590075192, "grad_norm": 0.12624883651733398, "learning_rate": 1e-06, "loss": 0.0699, "step": 1289 }, { "clip_ratio/high_max": 0.0022364586984622292, "clip_ratio/high_mean": 0.0008985355580080068, "clip_ratio/low_mean": 0.0009325439787062351, "clip_ratio/low_min": 0.000146565743307292, "clip_ratio/region_mean": 0.0018310795276192948, "epoch": 0.1203996990007525, "grad_norm": 0.11929628252983093, "learning_rate": 1e-06, "loss": 0.0377, "step": 1290 }, { "clip_ratio/high_max": 0.00248417829425307, "clip_ratio/high_mean": 0.0009137222423305502, "clip_ratio/low_mean": 0.0009736206557136029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018873429216910154, "epoch": 0.12049303210075309, "grad_norm": 0.11287343502044678, "learning_rate": 1e-06, "loss": 0.0326, "step": 1291 }, { "clip_ratio/high_max": 0.0019625572094810195, "clip_ratio/high_mean": 0.0007450074826920172, "clip_ratio/low_mean": 0.0010170580317208078, "clip_ratio/low_min": 8.558083754905965e-05, "clip_ratio/region_mean": 0.0017620654907659627, "epoch": 0.12058636520075366, "grad_norm": 0.11487818509340286, "learning_rate": 1e-06, "loss": 0.0153, "step": 1292 }, { "clip_ratio/high_max": 0.002077179447951494, "clip_ratio/high_mean": 0.00080206192433252, "clip_ratio/low_mean": 0.0010252702631987631, "clip_ratio/low_min": 8.367015288968105e-05, "clip_ratio/region_mean": 0.001827332191169262, "epoch": 0.12067969830075424, "grad_norm": 0.11239241808652878, "learning_rate": 1e-06, "loss": 0.0353, "step": 1293 }, { "clip_ratio/high_max": 0.002465377656335477, "clip_ratio/high_mean": 0.000920115138796973, "clip_ratio/low_mean": 0.0009740576242620591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018941727612400427, "epoch": 0.12077303140075484, "grad_norm": 0.11380409449338913, "learning_rate": 1e-06, "loss": 0.0114, "step": 1294 }, { "clip_ratio/high_max": 0.002330126349988859, "clip_ratio/high_mean": 0.0009716566182760289, "clip_ratio/low_mean": 0.000978649110038532, "clip_ratio/low_min": 5.766382582805818e-05, "clip_ratio/region_mean": 0.0019503056755638681, "epoch": 0.12086636450075541, "grad_norm": 0.11873907595872879, "learning_rate": 1e-06, "loss": 0.0209, "step": 1295 }, { "clip_ratio/high_max": 0.0023141233978094533, "clip_ratio/high_mean": 0.0009618253352527972, "clip_ratio/low_mean": 0.001166330632258905, "clip_ratio/low_min": 0.00019162415537721245, "clip_ratio/region_mean": 0.0021281559238559566, "epoch": 0.12095969760075599, "grad_norm": 0.12207131087779999, "learning_rate": 1e-06, "loss": 0.0916, "step": 1296 }, { "clip_ratio/high_max": 0.001938506571605103, "clip_ratio/high_mean": 0.0008880145978764631, "clip_ratio/low_mean": 0.0009675995497673284, "clip_ratio/low_min": 3.249285146011971e-05, "clip_ratio/region_mean": 0.0018556141731096432, "epoch": 0.12105303070075658, "grad_norm": 0.10858555138111115, "learning_rate": 1e-06, "loss": 0.023, "step": 1297 }, { "clip_ratio/high_max": 0.002441810844175052, "clip_ratio/high_mean": 0.0010267351590300677, "clip_ratio/low_mean": 0.000797704598880955, "clip_ratio/low_min": 5.952290121058468e-05, "clip_ratio/region_mean": 0.0018244397615490016, "epoch": 0.12114636380075716, "grad_norm": 0.13026843965053558, "learning_rate": 1e-06, "loss": -0.0014, "step": 1298 }, { "clip_ratio/high_max": 0.002252244605188025, "clip_ratio/high_mean": 0.0009366441827296512, "clip_ratio/low_mean": 0.0008289946645163582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017656388445175253, "epoch": 0.12123969690075775, "grad_norm": 0.10747717320919037, "learning_rate": 1e-06, "loss": 0.0268, "step": 1299 }, { "clip_ratio/high_max": 0.0024801314139040187, "clip_ratio/high_mean": 0.0010120064071088564, "clip_ratio/low_mean": 0.0009798703740671044, "clip_ratio/low_min": 7.715442734479439e-05, "clip_ratio/region_mean": 0.0019918768157367595, "epoch": 0.12133303000075833, "grad_norm": 0.12325803935527802, "learning_rate": 1e-06, "loss": 0.0242, "step": 1300 }, { "clip_ratio/high_max": 0.002263571754156146, "clip_ratio/high_mean": 0.0009484489419264719, "clip_ratio/low_mean": 0.0010041467139672022, "clip_ratio/low_min": 9.184146256302483e-05, "clip_ratio/region_mean": 0.0019525956304278225, "epoch": 0.12142636310075891, "grad_norm": 0.10906504839658737, "learning_rate": 1e-06, "loss": 0.063, "step": 1301 }, { "clip_ratio/high_max": 0.0026338738825870678, "clip_ratio/high_mean": 0.0010177921139984392, "clip_ratio/low_mean": 0.001031638255881262, "clip_ratio/low_min": 6.40285793451767e-05, "clip_ratio/region_mean": 0.0020494303535087965, "epoch": 0.1215196962007595, "grad_norm": 0.10667446255683899, "learning_rate": 1e-06, "loss": 0.0439, "step": 1302 }, { "clip_ratio/high_max": 0.0025169359578285366, "clip_ratio/high_mean": 0.0009745193019625731, "clip_ratio/low_mean": 0.0009110706087085418, "clip_ratio/low_min": 5.024115671403706e-05, "clip_ratio/region_mean": 0.0018855899252230301, "epoch": 0.12161302930076008, "grad_norm": 0.11458733677864075, "learning_rate": 1e-06, "loss": 0.0413, "step": 1303 }, { "clip_ratio/high_max": 0.002306732454599114, "clip_ratio/high_mean": 0.0010140952526853653, "clip_ratio/low_mean": 0.0008844761159707559, "clip_ratio/low_min": 5.024313759349752e-05, "clip_ratio/region_mean": 0.001898571390483994, "epoch": 0.12170636240076066, "grad_norm": 0.11792048811912537, "learning_rate": 1e-06, "loss": 0.0082, "step": 1304 }, { "clip_ratio/high_max": 0.002436463786580134, "clip_ratio/high_mean": 0.0010538268361415248, "clip_ratio/low_mean": 0.0009685346358310198, "clip_ratio/low_min": 1.5679879652452655e-05, "clip_ratio/region_mean": 0.0020223614701535553, "epoch": 0.12179969550076125, "grad_norm": 0.10799770057201385, "learning_rate": 1e-06, "loss": -0.0223, "step": 1305 }, { "clip_ratio/high_max": 0.0023058362348820083, "clip_ratio/high_mean": 0.0009102488857024582, "clip_ratio/low_mean": 0.000975988652498927, "clip_ratio/low_min": 8.764481026446447e-05, "clip_ratio/region_mean": 0.0018862375291064382, "epoch": 0.12189302860076183, "grad_norm": 0.11240687221288681, "learning_rate": 1e-06, "loss": 0.0533, "step": 1306 }, { "clip_ratio/high_max": 0.002587798604508862, "clip_ratio/high_mean": 0.0010444677336636232, "clip_ratio/low_mean": 0.0010610118843032978, "clip_ratio/low_min": 3.502708932501264e-05, "clip_ratio/region_mean": 0.0021054796015960164, "epoch": 0.12198636170076241, "grad_norm": 0.11873338371515274, "learning_rate": 1e-06, "loss": 0.0419, "step": 1307 }, { "clip_ratio/high_max": 0.002238122084236238, "clip_ratio/high_mean": 0.0009666623318480561, "clip_ratio/low_mean": 0.0010871769409277476, "clip_ratio/low_min": 5.963993498880882e-05, "clip_ratio/region_mean": 0.002053839292784687, "epoch": 0.122079694800763, "grad_norm": 0.10468167811632156, "learning_rate": 1e-06, "loss": 0.0155, "step": 1308 }, { "clip_ratio/high_max": 0.0020813558730878867, "clip_ratio/high_mean": 0.0009170425855700159, "clip_ratio/low_mean": 0.00087155876099132, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017886013665702194, "epoch": 0.12217302790076358, "grad_norm": 0.28226950764656067, "learning_rate": 1e-06, "loss": -0.0454, "step": 1309 }, { "clip_ratio/high_max": 0.0025746532119228505, "clip_ratio/high_mean": 0.001176062723970972, "clip_ratio/low_mean": 0.0009494975365669234, "clip_ratio/low_min": 1.5875031749601476e-05, "clip_ratio/region_mean": 0.0021255602769088, "epoch": 0.12226636100076417, "grad_norm": 0.11189974099397659, "learning_rate": 1e-06, "loss": -0.0338, "step": 1310 }, { "clip_ratio/high_max": 0.0022610522937611677, "clip_ratio/high_mean": 0.0009601425845175982, "clip_ratio/low_mean": 0.0011916314469999634, "clip_ratio/low_min": 6.783997559978161e-05, "clip_ratio/region_mean": 0.0021517740169656463, "epoch": 0.12235969410076475, "grad_norm": 0.1321498602628708, "learning_rate": 1e-06, "loss": 0.0396, "step": 1311 }, { "clip_ratio/high_max": 0.0026759172178572044, "clip_ratio/high_mean": 0.001046733363182284, "clip_ratio/low_mean": 0.0011467236690805294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021934570249868557, "epoch": 0.12245302720076533, "grad_norm": 0.11425404995679855, "learning_rate": 1e-06, "loss": 0.0316, "step": 1312 }, { "clip_ratio/high_max": 0.003096200482104905, "clip_ratio/high_mean": 0.0012247566446603741, "clip_ratio/low_mean": 0.0009124176203840761, "clip_ratio/low_min": 3.974411629315e-05, "clip_ratio/region_mean": 0.0021371742550400086, "epoch": 0.12254636030076592, "grad_norm": 0.11611877381801605, "learning_rate": 1e-06, "loss": -0.0697, "step": 1313 }, { "clip_ratio/high_max": 0.0021622707026835997, "clip_ratio/high_mean": 0.0009445622126804665, "clip_ratio/low_mean": 0.0010839642454811838, "clip_ratio/low_min": 6.705146370222792e-05, "clip_ratio/region_mean": 0.0020285264326957986, "epoch": 0.1226396934007665, "grad_norm": 0.10304926335811615, "learning_rate": 1e-06, "loss": 0.0155, "step": 1314 }, { "clip_ratio/high_max": 0.0023880083099356852, "clip_ratio/high_mean": 0.0011002809715137118, "clip_ratio/low_mean": 0.00112236069253413, "clip_ratio/low_min": 9.272780152969062e-05, "clip_ratio/region_mean": 0.00222264166950481, "epoch": 0.12273302650076708, "grad_norm": 0.11256153881549835, "learning_rate": 1e-06, "loss": 0.0183, "step": 1315 }, { "clip_ratio/high_max": 0.0027116526107420214, "clip_ratio/high_mean": 0.0011230716700083576, "clip_ratio/low_mean": 0.0009181561144941952, "clip_ratio/low_min": 4.2745424252643716e-05, "clip_ratio/region_mean": 0.002041227839072235, "epoch": 0.12282635960076767, "grad_norm": 0.11524578183889389, "learning_rate": 1e-06, "loss": -0.0165, "step": 1316 }, { "clip_ratio/high_max": 0.0024223047657869756, "clip_ratio/high_mean": 0.0010719097845139913, "clip_ratio/low_mean": 0.0011505577494972385, "clip_ratio/low_min": 0.00012231938217155403, "clip_ratio/region_mean": 0.002222467490355484, "epoch": 0.12291969270076825, "grad_norm": 0.12907569110393524, "learning_rate": 1e-06, "loss": 0.0264, "step": 1317 }, { "clip_ratio/high_max": 0.0030504561436828226, "clip_ratio/high_mean": 0.0014051681791897863, "clip_ratio/low_mean": 0.000994932164758211, "clip_ratio/low_min": 5.6225260777864605e-05, "clip_ratio/region_mean": 0.0024001003766898066, "epoch": 0.12301302580076884, "grad_norm": 0.17135463654994965, "learning_rate": 1e-06, "loss": -0.0574, "step": 1318 }, { "clip_ratio/high_max": 0.0025851281607174315, "clip_ratio/high_mean": 0.0010976346493407618, "clip_ratio/low_mean": 0.0010628374002408236, "clip_ratio/low_min": 3.79289504053304e-05, "clip_ratio/region_mean": 0.00216047209687531, "epoch": 0.12310635890076942, "grad_norm": 0.203459694981575, "learning_rate": 1e-06, "loss": -0.0079, "step": 1319 }, { "clip_ratio/high_max": 0.002792992665490601, "clip_ratio/high_mean": 0.0011231363314436749, "clip_ratio/low_mean": 0.0008654042858324829, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019885405999957584, "epoch": 0.12319969200077, "grad_norm": 0.10876962542533875, "learning_rate": 1e-06, "loss": -0.0287, "step": 1320 }, { "clip_ratio/high_max": 0.0026682554234866984, "clip_ratio/high_mean": 0.0010714533273130655, "clip_ratio/low_mean": 0.0011184101076651132, "clip_ratio/low_min": 3.549926077539567e-05, "clip_ratio/region_mean": 0.002189863436797168, "epoch": 0.12329302510077059, "grad_norm": 0.11900845915079117, "learning_rate": 1e-06, "loss": 0.0228, "step": 1321 }, { "clip_ratio/high_max": 0.002558654232416302, "clip_ratio/high_mean": 0.0009916140752466163, "clip_ratio/low_mean": 0.0010297972567059333, "clip_ratio/low_min": 1.213356608786853e-05, "clip_ratio/region_mean": 0.002021411324676592, "epoch": 0.12338635820077117, "grad_norm": 0.1073676273226738, "learning_rate": 1e-06, "loss": 0.003, "step": 1322 }, { "clip_ratio/high_max": 0.0022912902059033513, "clip_ratio/high_mean": 0.0009251749106624629, "clip_ratio/low_mean": 0.001135120135586476, "clip_ratio/low_min": 0.00013762794424110325, "clip_ratio/region_mean": 0.002060295082628727, "epoch": 0.12347969130077174, "grad_norm": 0.10651320964097977, "learning_rate": 1e-06, "loss": 0.0268, "step": 1323 }, { "clip_ratio/high_max": 0.002627836300234776, "clip_ratio/high_mean": 0.0011033288683393039, "clip_ratio/low_mean": 0.0011243516546528554, "clip_ratio/low_min": 6.528768608404789e-05, "clip_ratio/region_mean": 0.002227680539363064, "epoch": 0.12357302440077234, "grad_norm": 0.11595786362886429, "learning_rate": 1e-06, "loss": -0.0092, "step": 1324 }, { "clip_ratio/high_max": 0.0026489586161915213, "clip_ratio/high_mean": 0.0010106006066052942, "clip_ratio/low_mean": 0.001140249964009854, "clip_ratio/low_min": 6.696042328258045e-05, "clip_ratio/region_mean": 0.002150850610632915, "epoch": 0.12366635750077291, "grad_norm": 0.12186706066131592, "learning_rate": 1e-06, "loss": 0.032, "step": 1325 }, { "clip_ratio/high_max": 0.0028390686929924414, "clip_ratio/high_mean": 0.0011158923498442164, "clip_ratio/low_mean": 0.0010702023264457239, "clip_ratio/low_min": 9.896701521938667e-05, "clip_ratio/region_mean": 0.0021860946581000462, "epoch": 0.12375969060077349, "grad_norm": 0.11678916215896606, "learning_rate": 1e-06, "loss": 0.0145, "step": 1326 }, { "clip_ratio/high_max": 0.002581896107585635, "clip_ratio/high_mean": 0.0011386568912712391, "clip_ratio/low_mean": 0.001268626656383276, "clip_ratio/low_min": 9.548227535560727e-05, "clip_ratio/region_mean": 0.002407283529464621, "epoch": 0.12385302370077408, "grad_norm": 0.15045490860939026, "learning_rate": 1e-06, "loss": 0.0264, "step": 1327 }, { "clip_ratio/high_max": 0.002736391448706854, "clip_ratio/high_mean": 0.0010880596546485322, "clip_ratio/low_mean": 0.0011997807741863653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002287840448843781, "epoch": 0.12394635680077466, "grad_norm": 0.11760014295578003, "learning_rate": 1e-06, "loss": 0.0395, "step": 1328 }, { "clip_ratio/high_max": 0.002846799194230698, "clip_ratio/high_mean": 0.0012203809965285473, "clip_ratio/low_mean": 0.0012402048487274442, "clip_ratio/low_min": 2.7209867766941898e-05, "clip_ratio/region_mean": 0.0024605858488939703, "epoch": 0.12403968990077525, "grad_norm": 0.13172033429145813, "learning_rate": 1e-06, "loss": -0.0044, "step": 1329 }, { "clip_ratio/high_max": 0.0025657784717623144, "clip_ratio/high_mean": 0.001073271872883197, "clip_ratio/low_mean": 0.0010268364057992585, "clip_ratio/low_min": 3.929303238692228e-05, "clip_ratio/region_mean": 0.0021001082786824554, "epoch": 0.12413302300077583, "grad_norm": 0.11676289886236191, "learning_rate": 1e-06, "loss": -0.002, "step": 1330 }, { "clip_ratio/high_max": 0.0024948178688646294, "clip_ratio/high_mean": 0.0010577664543234278, "clip_ratio/low_mean": 0.001128869207605021, "clip_ratio/low_min": 0.00012567300018417882, "clip_ratio/region_mean": 0.0021866356328246184, "epoch": 0.12422635610077641, "grad_norm": 0.11228536814451218, "learning_rate": 1e-06, "loss": 0.0427, "step": 1331 }, { "clip_ratio/high_max": 0.0026571896523819305, "clip_ratio/high_mean": 0.0011416672205086797, "clip_ratio/low_mean": 0.0011031921858375426, "clip_ratio/low_min": 0.00010886806194321252, "clip_ratio/region_mean": 0.0022448594390880316, "epoch": 0.124319689200777, "grad_norm": 0.1288970559835434, "learning_rate": 1e-06, "loss": 0.0124, "step": 1332 }, { "clip_ratio/high_max": 0.0025320590284536593, "clip_ratio/high_mean": 0.0011570885690161958, "clip_ratio/low_mean": 0.001041901847202098, "clip_ratio/low_min": 6.738956108165439e-05, "clip_ratio/region_mean": 0.002198990448960103, "epoch": 0.12441302230077758, "grad_norm": 0.1124814823269844, "learning_rate": 1e-06, "loss": 0.0224, "step": 1333 }, { "clip_ratio/high_max": 0.0026128546305699274, "clip_ratio/high_mean": 0.0009582051643519662, "clip_ratio/low_mean": 0.0013204697515902808, "clip_ratio/low_min": 3.88923472200986e-05, "clip_ratio/region_mean": 0.0022786749395891093, "epoch": 0.12450635540077816, "grad_norm": 0.12329450994729996, "learning_rate": 1e-06, "loss": 0.0299, "step": 1334 }, { "clip_ratio/high_max": 0.0023214472603285685, "clip_ratio/high_mean": 0.0010030869634647388, "clip_ratio/low_mean": 0.0011128095429739915, "clip_ratio/low_min": 2.2425547285820358e-05, "clip_ratio/region_mean": 0.0021158965246286243, "epoch": 0.12459968850077875, "grad_norm": 0.11730413138866425, "learning_rate": 1e-06, "loss": 0.0309, "step": 1335 }, { "clip_ratio/high_max": 0.002982100468216231, "clip_ratio/high_mean": 0.0011101754644187167, "clip_ratio/low_mean": 0.001293740890105255, "clip_ratio/low_min": 0.00015990870997484308, "clip_ratio/region_mean": 0.002403916332696099, "epoch": 0.12469302160077933, "grad_norm": 0.1133042648434639, "learning_rate": 1e-06, "loss": 0.0572, "step": 1336 }, { "clip_ratio/high_max": 0.003016605398443062, "clip_ratio/high_mean": 0.001397381358401617, "clip_ratio/low_mean": 0.0010754951090348186, "clip_ratio/low_min": 2.4752474928391166e-05, "clip_ratio/region_mean": 0.002472876527463086, "epoch": 0.12478635470077991, "grad_norm": 0.1099381148815155, "learning_rate": 1e-06, "loss": -0.0368, "step": 1337 }, { "clip_ratio/high_max": 0.002582431581686251, "clip_ratio/high_mean": 0.0010401414583611768, "clip_ratio/low_mean": 0.0011927328123420011, "clip_ratio/low_min": 8.804158096609171e-05, "clip_ratio/region_mean": 0.002232874285255093, "epoch": 0.1248796878007805, "grad_norm": 0.11148440837860107, "learning_rate": 1e-06, "loss": 0.0108, "step": 1338 }, { "clip_ratio/high_max": 0.00257693241655943, "clip_ratio/high_mean": 0.001114319193220581, "clip_ratio/low_mean": 0.001178469881779165, "clip_ratio/low_min": 5.861662430106662e-05, "clip_ratio/region_mean": 0.0022927890604478307, "epoch": 0.12497302090078108, "grad_norm": 0.11407836526632309, "learning_rate": 1e-06, "loss": -0.0098, "step": 1339 }, { "clip_ratio/high_max": 0.0026051498207380064, "clip_ratio/high_mean": 0.00100246686997707, "clip_ratio/low_mean": 0.0010983349638991058, "clip_ratio/low_min": 1.4856192137813196e-05, "clip_ratio/region_mean": 0.0021008018447901122, "epoch": 0.12506635400078167, "grad_norm": 0.13212084770202637, "learning_rate": 1e-06, "loss": 0.0502, "step": 1340 }, { "clip_ratio/high_max": 0.002599393861601129, "clip_ratio/high_mean": 0.0011785814094764646, "clip_ratio/low_mean": 0.0012800695167243248, "clip_ratio/low_min": 2.0906680219923146e-05, "clip_ratio/region_mean": 0.002458650960761588, "epoch": 0.12515968710078224, "grad_norm": 0.11319679021835327, "learning_rate": 1e-06, "loss": 0.047, "step": 1341 }, { "clip_ratio/high_max": 0.0027756354538723826, "clip_ratio/high_mean": 0.0012592410421348177, "clip_ratio/low_mean": 0.0011250367824686691, "clip_ratio/low_min": 0.0001464491397200618, "clip_ratio/region_mean": 0.002384277780947741, "epoch": 0.12525302020078283, "grad_norm": 0.12402558326721191, "learning_rate": 1e-06, "loss": 0.013, "step": 1342 }, { "clip_ratio/high_max": 0.0027977512509096414, "clip_ratio/high_mean": 0.0012480715158744715, "clip_ratio/low_mean": 0.001040811019265675, "clip_ratio/low_min": 3.241995727876201e-05, "clip_ratio/region_mean": 0.0022888825405971147, "epoch": 0.12534635330078342, "grad_norm": 0.10401278734207153, "learning_rate": 1e-06, "loss": 0.0065, "step": 1343 }, { "clip_ratio/high_max": 0.002564290538430214, "clip_ratio/high_mean": 0.0010519442257646006, "clip_ratio/low_mean": 0.0012405525976646459, "clip_ratio/low_min": 8.702085415279726e-05, "clip_ratio/region_mean": 0.002292496785230469, "epoch": 0.125439686400784, "grad_norm": 0.12406987696886063, "learning_rate": 1e-06, "loss": 0.0553, "step": 1344 }, { "clip_ratio/high_max": 0.002811659906001296, "clip_ratio/high_mean": 0.001144557125371648, "clip_ratio/low_mean": 0.0011629606287897332, "clip_ratio/low_min": 5.1769700803561136e-05, "clip_ratio/region_mean": 0.002307517708686646, "epoch": 0.12553301950078458, "grad_norm": 0.7004154324531555, "learning_rate": 1e-06, "loss": 0.0141, "step": 1345 }, { "clip_ratio/high_max": 0.002468482041876996, "clip_ratio/high_mean": 0.0010542313702899264, "clip_ratio/low_mean": 0.0012123147243983112, "clip_ratio/low_min": 0.00010982124877045862, "clip_ratio/region_mean": 0.002266546136524994, "epoch": 0.12562635260078517, "grad_norm": 0.11669385433197021, "learning_rate": 1e-06, "loss": 0.0351, "step": 1346 }, { "clip_ratio/high_max": 0.0033084197420976125, "clip_ratio/high_mean": 0.001157682894699974, "clip_ratio/low_mean": 0.001303948385611875, "clip_ratio/low_min": 1.775568125594873e-05, "clip_ratio/region_mean": 0.0024616312512080185, "epoch": 0.12571968570078576, "grad_norm": 0.12311021238565445, "learning_rate": 1e-06, "loss": 0.0588, "step": 1347 }, { "clip_ratio/high_max": 0.0027056929393438622, "clip_ratio/high_mean": 0.00103837785354699, "clip_ratio/low_mean": 0.0013868059759261087, "clip_ratio/low_min": 0.00017004779147100635, "clip_ratio/region_mean": 0.0024251838476629928, "epoch": 0.12581301880078632, "grad_norm": 0.11250880360603333, "learning_rate": 1e-06, "loss": 0.0258, "step": 1348 }, { "clip_ratio/high_max": 0.0029295459971763194, "clip_ratio/high_mean": 0.0010823124976013787, "clip_ratio/low_mean": 0.0012509830557974055, "clip_ratio/low_min": 6.736807517881971e-05, "clip_ratio/region_mean": 0.002333295517018996, "epoch": 0.12590635190078692, "grad_norm": 0.11140339821577072, "learning_rate": 1e-06, "loss": 0.0555, "step": 1349 }, { "clip_ratio/high_max": 0.0024087074416456744, "clip_ratio/high_mean": 0.0011355404021742288, "clip_ratio/low_mean": 0.0012424959204508923, "clip_ratio/low_min": 4.464145695237676e-05, "clip_ratio/region_mean": 0.0023780363189871423, "epoch": 0.1259996850007875, "grad_norm": 0.11859416961669922, "learning_rate": 1e-06, "loss": 0.0102, "step": 1350 }, { "clip_ratio/high_max": 0.0030166213764459826, "clip_ratio/high_mean": 0.001319939794484526, "clip_ratio/low_mean": 0.0012165483130957, "clip_ratio/low_min": 8.977844845503569e-05, "clip_ratio/region_mean": 0.0025364881366840564, "epoch": 0.12609301810078807, "grad_norm": 0.12291578948497772, "learning_rate": 1e-06, "loss": 0.0153, "step": 1351 }, { "clip_ratio/high_max": 0.0028500072367023677, "clip_ratio/high_mean": 0.0012187771644676104, "clip_ratio/low_mean": 0.0011386931510060094, "clip_ratio/low_min": 7.347841528826393e-05, "clip_ratio/region_mean": 0.0023574703227495775, "epoch": 0.12618635120078867, "grad_norm": 0.11126212030649185, "learning_rate": 1e-06, "loss": 0.0068, "step": 1352 }, { "clip_ratio/high_max": 0.002567450312199071, "clip_ratio/high_mean": 0.0011300108053546865, "clip_ratio/low_mean": 0.0011173318707733415, "clip_ratio/low_min": 4.81106253573671e-05, "clip_ratio/region_mean": 0.002247342694317922, "epoch": 0.12627968430078926, "grad_norm": 0.11562218517065048, "learning_rate": 1e-06, "loss": 0.0556, "step": 1353 }, { "clip_ratio/high_max": 0.0024769176670815796, "clip_ratio/high_mean": 0.0010596116371743847, "clip_ratio/low_mean": 0.0012322688598942477, "clip_ratio/low_min": 8.583437556808349e-05, "clip_ratio/region_mean": 0.0022918804534128867, "epoch": 0.12637301740078982, "grad_norm": 0.11048205941915512, "learning_rate": 1e-06, "loss": 0.0201, "step": 1354 }, { "clip_ratio/high_max": 0.003350231003423687, "clip_ratio/high_mean": 0.0013644994469359517, "clip_ratio/low_mean": 0.001189818680359167, "clip_ratio/low_min": 0.00012383401372062508, "clip_ratio/region_mean": 0.0025543181182001717, "epoch": 0.12646635050079041, "grad_norm": 0.10970748215913773, "learning_rate": 1e-06, "loss": 0.0236, "step": 1355 }, { "clip_ratio/high_max": 0.0025176360650220886, "clip_ratio/high_mean": 0.0011429897749621887, "clip_ratio/low_mean": 0.0010753201167972293, "clip_ratio/low_min": 7.076898509694729e-05, "clip_ratio/region_mean": 0.002218309928139206, "epoch": 0.126559683600791, "grad_norm": 0.11352907866239548, "learning_rate": 1e-06, "loss": -0.0035, "step": 1356 }, { "clip_ratio/high_max": 0.0023531259030278306, "clip_ratio/high_mean": 0.000930058822632418, "clip_ratio/low_mean": 0.0013118899005348794, "clip_ratio/low_min": 0.00024105198644974735, "clip_ratio/region_mean": 0.0022419487650040537, "epoch": 0.12665301670079157, "grad_norm": 0.12025987356901169, "learning_rate": 1e-06, "loss": 0.0609, "step": 1357 }, { "clip_ratio/high_max": 0.003036089015949983, "clip_ratio/high_mean": 0.0013386768150667194, "clip_ratio/low_mean": 0.0012784345708496403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002617111320432741, "epoch": 0.12674634980079216, "grad_norm": 0.12024588137865067, "learning_rate": 1e-06, "loss": 0.0641, "step": 1358 }, { "clip_ratio/high_max": 0.0031052927297423594, "clip_ratio/high_mean": 0.001301228203374194, "clip_ratio/low_mean": 0.0010590280271571828, "clip_ratio/low_min": 0.00010009702145907795, "clip_ratio/region_mean": 0.0023602563160238788, "epoch": 0.12683968290079276, "grad_norm": 0.11043490469455719, "learning_rate": 1e-06, "loss": 0.0179, "step": 1359 }, { "clip_ratio/high_max": 0.0034211751553812064, "clip_ratio/high_mean": 0.0014332157734315842, "clip_ratio/low_mean": 0.001094557130272733, "clip_ratio/low_min": 9.345641774416436e-05, "clip_ratio/region_mean": 0.002527772892790381, "epoch": 0.12693301600079332, "grad_norm": 0.10914299637079239, "learning_rate": 1e-06, "loss": -0.0266, "step": 1360 }, { "clip_ratio/high_max": 0.0026519842140260153, "clip_ratio/high_mean": 0.0012206122300995048, "clip_ratio/low_mean": 0.0013386475147854071, "clip_ratio/low_min": 0.00025493222619843436, "clip_ratio/region_mean": 0.0025592597739887424, "epoch": 0.1270263491007939, "grad_norm": 0.1634318232536316, "learning_rate": 1e-06, "loss": 0.0312, "step": 1361 }, { "clip_ratio/high_max": 0.002511231454263907, "clip_ratio/high_mean": 0.0010970595558319474, "clip_ratio/low_mean": 0.00125238616601564, "clip_ratio/low_min": 4.7654333684477024e-05, "clip_ratio/region_mean": 0.002349445734580513, "epoch": 0.1271196822007945, "grad_norm": 0.10162568092346191, "learning_rate": 1e-06, "loss": 0.0267, "step": 1362 }, { "clip_ratio/high_max": 0.0026206374532193877, "clip_ratio/high_mean": 0.0012008247031189967, "clip_ratio/low_mean": 0.0012865560420323163, "clip_ratio/low_min": 3.811750502791256e-05, "clip_ratio/region_mean": 0.002487380821548868, "epoch": 0.12721301530079507, "grad_norm": 0.11069579422473907, "learning_rate": 1e-06, "loss": 0.0313, "step": 1363 }, { "clip_ratio/high_max": 0.0033561226882738993, "clip_ratio/high_mean": 0.001254858885658905, "clip_ratio/low_mean": 0.0013296573870320572, "clip_ratio/low_min": 2.5730752895469777e-05, "clip_ratio/region_mean": 0.0025845162381301634, "epoch": 0.12730634840079566, "grad_norm": 0.11410504579544067, "learning_rate": 1e-06, "loss": 0.0341, "step": 1364 }, { "clip_ratio/high_max": 0.002540686691645533, "clip_ratio/high_mean": 0.0011224654554098379, "clip_ratio/low_mean": 0.0013588600777438842, "clip_ratio/low_min": 2.04381940420717e-05, "clip_ratio/region_mean": 0.002481325536791701, "epoch": 0.12739968150079625, "grad_norm": 0.11087482422590256, "learning_rate": 1e-06, "loss": 0.0334, "step": 1365 }, { "clip_ratio/high_max": 0.002916283396189101, "clip_ratio/high_mean": 0.001222200724441791, "clip_ratio/low_mean": 0.001237593342011678, "clip_ratio/low_min": 7.867049771448364e-05, "clip_ratio/region_mean": 0.0024597941010142677, "epoch": 0.12749301460079684, "grad_norm": 0.11939911544322968, "learning_rate": 1e-06, "loss": -0.0276, "step": 1366 }, { "clip_ratio/high_max": 0.0029829672203049995, "clip_ratio/high_mean": 0.0013271419102238724, "clip_ratio/low_mean": 0.0014972758472140413, "clip_ratio/low_min": 0.00010067838593386114, "clip_ratio/region_mean": 0.0028244178029126488, "epoch": 0.1275863477007974, "grad_norm": 0.12010502815246582, "learning_rate": 1e-06, "loss": 0.0289, "step": 1367 }, { "clip_ratio/high_max": 0.0027923289526370354, "clip_ratio/high_mean": 0.0012110241623304319, "clip_ratio/low_mean": 0.0013404868750512833, "clip_ratio/low_min": 8.446245647064643e-05, "clip_ratio/region_mean": 0.0025515109882690012, "epoch": 0.127679680800798, "grad_norm": 0.10742495954036713, "learning_rate": 1e-06, "loss": 0.0024, "step": 1368 }, { "clip_ratio/high_max": 0.00304395620332798, "clip_ratio/high_mean": 0.0012475155999709386, "clip_ratio/low_mean": 0.0014075656727072783, "clip_ratio/low_min": 8.948659069574205e-05, "clip_ratio/region_mean": 0.0026550812617642805, "epoch": 0.1277730139007986, "grad_norm": 0.11154259741306305, "learning_rate": 1e-06, "loss": 0.0516, "step": 1369 }, { "clip_ratio/high_max": 0.0030153183033689857, "clip_ratio/high_mean": 0.0012760466379404534, "clip_ratio/low_mean": 0.0013798076870443765, "clip_ratio/low_min": 8.117053585010581e-05, "clip_ratio/region_mean": 0.002655854288605042, "epoch": 0.12786634700079916, "grad_norm": 0.13308484852313995, "learning_rate": 1e-06, "loss": 0.019, "step": 1370 }, { "clip_ratio/high_max": 0.0025975142198149115, "clip_ratio/high_mean": 0.0011475003520899918, "clip_ratio/low_mean": 0.0013063758487987798, "clip_ratio/low_min": 0.00011564903252292424, "clip_ratio/region_mean": 0.002453876171784941, "epoch": 0.12795968010079975, "grad_norm": 0.10471636056900024, "learning_rate": 1e-06, "loss": 0.0376, "step": 1371 }, { "clip_ratio/high_max": 0.0031849707884248346, "clip_ratio/high_mean": 0.0013211326076998375, "clip_ratio/low_mean": 0.00129524042858975, "clip_ratio/low_min": 3.910819395969156e-05, "clip_ratio/region_mean": 0.0026163729853578843, "epoch": 0.12805301320080034, "grad_norm": 0.11647000908851624, "learning_rate": 1e-06, "loss": -0.0217, "step": 1372 }, { "clip_ratio/high_max": 0.0031562722797389142, "clip_ratio/high_mean": 0.0011864872503792867, "clip_ratio/low_mean": 0.0011247345064475667, "clip_ratio/low_min": 4.94201376568526e-05, "clip_ratio/region_mean": 0.0023112217750167474, "epoch": 0.1281463463008009, "grad_norm": 0.1003836840391159, "learning_rate": 1e-06, "loss": 0.0153, "step": 1373 }, { "clip_ratio/high_max": 0.002775829896563664, "clip_ratio/high_mean": 0.0011705961587722413, "clip_ratio/low_mean": 0.0013226117662270553, "clip_ratio/low_min": 3.2834253943292424e-05, "clip_ratio/region_mean": 0.0024932079613790847, "epoch": 0.1282396794008015, "grad_norm": 0.11304599046707153, "learning_rate": 1e-06, "loss": 0.0388, "step": 1374 }, { "clip_ratio/high_max": 0.0027772734611062333, "clip_ratio/high_mean": 0.0012227170627738815, "clip_ratio/low_mean": 0.0012744320702040568, "clip_ratio/low_min": 3.273536640335806e-05, "clip_ratio/region_mean": 0.0024971491293399595, "epoch": 0.1283330125008021, "grad_norm": 0.11063316464424133, "learning_rate": 1e-06, "loss": 0.0009, "step": 1375 }, { "clip_ratio/high_max": 0.0028856489661848173, "clip_ratio/high_mean": 0.0011260449900873937, "clip_ratio/low_mean": 0.0014833531822660007, "clip_ratio/low_min": 0.0001267631014343351, "clip_ratio/region_mean": 0.0026093981650774367, "epoch": 0.12842634560080265, "grad_norm": 0.11840921640396118, "learning_rate": 1e-06, "loss": 0.0263, "step": 1376 }, { "clip_ratio/high_max": 0.0031462571350857615, "clip_ratio/high_mean": 0.0013985479417897295, "clip_ratio/low_mean": 0.001192144689412089, "clip_ratio/low_min": 1.0822510375874117e-05, "clip_ratio/region_mean": 0.0025906926312018186, "epoch": 0.12851967870080325, "grad_norm": 0.11033712327480316, "learning_rate": 1e-06, "loss": -0.0352, "step": 1377 }, { "clip_ratio/high_max": 0.002931233451818116, "clip_ratio/high_mean": 0.0012987254085601307, "clip_ratio/low_mean": 0.0013331221762200585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026318475356674753, "epoch": 0.12861301180080384, "grad_norm": 0.11512207239866257, "learning_rate": 1e-06, "loss": 0.0327, "step": 1378 }, { "clip_ratio/high_max": 0.0031872494218987413, "clip_ratio/high_mean": 0.0012775353898177855, "clip_ratio/low_mean": 0.0015626978092768695, "clip_ratio/low_min": 0.00022127443935460178, "clip_ratio/region_mean": 0.0028402332463883795, "epoch": 0.1287063449008044, "grad_norm": 0.12515591084957123, "learning_rate": 1e-06, "loss": 0.0323, "step": 1379 }, { "clip_ratio/high_max": 0.0031116569152800366, "clip_ratio/high_mean": 0.0013314692987478338, "clip_ratio/low_mean": 0.0014008225152792875, "clip_ratio/low_min": 0.0002107655855070334, "clip_ratio/region_mean": 0.0027322918467689306, "epoch": 0.128799678000805, "grad_norm": 0.12675832211971283, "learning_rate": 1e-06, "loss": 0.0075, "step": 1380 }, { "clip_ratio/high_max": 0.0027347295108484104, "clip_ratio/high_mean": 0.0012163412320660427, "clip_ratio/low_mean": 0.0012534443085314706, "clip_ratio/low_min": 2.838921864167787e-05, "clip_ratio/region_mean": 0.0024697855187696405, "epoch": 0.1288930111008056, "grad_norm": 0.12454894185066223, "learning_rate": 1e-06, "loss": -0.0154, "step": 1381 }, { "clip_ratio/high_max": 0.0025226335528714117, "clip_ratio/high_mean": 0.0011859914447995834, "clip_ratio/low_mean": 0.0012439866768545471, "clip_ratio/low_min": 6.472757559095044e-05, "clip_ratio/region_mean": 0.0024299781580339186, "epoch": 0.12898634420080615, "grad_norm": 0.11575935035943985, "learning_rate": 1e-06, "loss": 0.0057, "step": 1382 }, { "clip_ratio/high_max": 0.0022339663264574483, "clip_ratio/high_mean": 0.0009837511533987708, "clip_ratio/low_mean": 0.00159249584612553, "clip_ratio/low_min": 0.00019201983741368167, "clip_ratio/region_mean": 0.002576246981334407, "epoch": 0.12907967730080674, "grad_norm": 0.10702164471149445, "learning_rate": 1e-06, "loss": 0.0394, "step": 1383 }, { "clip_ratio/high_max": 0.0028238455415703356, "clip_ratio/high_mean": 0.0011758272848965134, "clip_ratio/low_mean": 0.0015015920616860967, "clip_ratio/low_min": 0.0001720309282973176, "clip_ratio/region_mean": 0.0026774193829623982, "epoch": 0.12917301040080734, "grad_norm": 0.1264890730381012, "learning_rate": 1e-06, "loss": 0.0192, "step": 1384 }, { "clip_ratio/high_max": 0.0032332927439711057, "clip_ratio/high_mean": 0.0014099479194555897, "clip_ratio/low_mean": 0.001346737288258737, "clip_ratio/low_min": 4.019809784949757e-05, "clip_ratio/region_mean": 0.002756685236818157, "epoch": 0.1292663435008079, "grad_norm": 0.11923909187316895, "learning_rate": 1e-06, "loss": 0.014, "step": 1385 }, { "clip_ratio/high_max": 0.0024510126022505574, "clip_ratio/high_mean": 0.0011480527973617427, "clip_ratio/low_mean": 0.0013162765717424918, "clip_ratio/low_min": 1.9516413885867223e-05, "clip_ratio/region_mean": 0.0024643293290864676, "epoch": 0.1293596766008085, "grad_norm": 0.12254194915294647, "learning_rate": 1e-06, "loss": 0.0514, "step": 1386 }, { "clip_ratio/high_max": 0.002646388740686234, "clip_ratio/high_mean": 0.0012281917806831188, "clip_ratio/low_mean": 0.0011648471627268009, "clip_ratio/low_min": 2.7028974727727473e-05, "clip_ratio/region_mean": 0.0023930389288580045, "epoch": 0.12945300970080909, "grad_norm": 0.10550342500209808, "learning_rate": 1e-06, "loss": 0.0329, "step": 1387 }, { "clip_ratio/high_max": 0.002958007316919975, "clip_ratio/high_mean": 0.0012395266312523745, "clip_ratio/low_mean": 0.001470461535063805, "clip_ratio/low_min": 0.00012581130067701451, "clip_ratio/region_mean": 0.002709988140850328, "epoch": 0.12954634280080968, "grad_norm": 0.1273244470357895, "learning_rate": 1e-06, "loss": 0.0328, "step": 1388 }, { "clip_ratio/high_max": 0.003077952773310244, "clip_ratio/high_mean": 0.0012885983851447236, "clip_ratio/low_mean": 0.0010589942867227364, "clip_ratio/low_min": 2.6150628400500864e-05, "clip_ratio/region_mean": 0.0023475926427636296, "epoch": 0.12963967590081024, "grad_norm": 0.10528501123189926, "learning_rate": 1e-06, "loss": -0.0534, "step": 1389 }, { "clip_ratio/high_max": 0.0023956300283316523, "clip_ratio/high_mean": 0.0010605086536088493, "clip_ratio/low_mean": 0.0013184277559048496, "clip_ratio/low_min": 0.00012944380068802275, "clip_ratio/region_mean": 0.0023789363985997625, "epoch": 0.12973300900081083, "grad_norm": 0.11130145192146301, "learning_rate": 1e-06, "loss": 0.0227, "step": 1390 }, { "clip_ratio/high_max": 0.0031019230737001635, "clip_ratio/high_mean": 0.001293247330977465, "clip_ratio/low_mean": 0.0016339628818968777, "clip_ratio/low_min": 0.00016817883260955568, "clip_ratio/region_mean": 0.002927210181951523, "epoch": 0.12982634210081143, "grad_norm": 0.11693202704191208, "learning_rate": 1e-06, "loss": 0.0645, "step": 1391 }, { "clip_ratio/high_max": 0.002797765177092515, "clip_ratio/high_mean": 0.0011576060387596954, "clip_ratio/low_mean": 0.0011010630914825015, "clip_ratio/low_min": 0.00010567695426288992, "clip_ratio/region_mean": 0.0022586691338801757, "epoch": 0.129919675200812, "grad_norm": 0.10736978054046631, "learning_rate": 1e-06, "loss": 0.0314, "step": 1392 }, { "clip_ratio/high_max": 0.002558309912274126, "clip_ratio/high_mean": 0.0011910736157005886, "clip_ratio/low_mean": 0.0012524053217930486, "clip_ratio/low_min": 5.5202819567057304e-05, "clip_ratio/region_mean": 0.0024434789156657644, "epoch": 0.13001300830081258, "grad_norm": 0.12168847024440765, "learning_rate": 1e-06, "loss": 0.0114, "step": 1393 }, { "clip_ratio/high_max": 0.002676270858501084, "clip_ratio/high_mean": 0.0011129300182801671, "clip_ratio/low_mean": 0.0015730028644611593, "clip_ratio/low_min": 0.00021488953279913403, "clip_ratio/region_mean": 0.002685932908207178, "epoch": 0.13010634140081317, "grad_norm": 0.13660931587219238, "learning_rate": 1e-06, "loss": 0.0573, "step": 1394 }, { "clip_ratio/high_max": 0.0026774827274493873, "clip_ratio/high_mean": 0.001188742447993718, "clip_ratio/low_mean": 0.0012503409379860386, "clip_ratio/low_min": 8.971944407676347e-05, "clip_ratio/region_mean": 0.002439083422359545, "epoch": 0.13019967450081374, "grad_norm": 0.10590992867946625, "learning_rate": 1e-06, "loss": 0.0004, "step": 1395 }, { "clip_ratio/high_max": 0.0027957746060565114, "clip_ratio/high_mean": 0.0012062489258823916, "clip_ratio/low_mean": 0.001335282306172303, "clip_ratio/low_min": 5.898510062252171e-05, "clip_ratio/region_mean": 0.002541531270253472, "epoch": 0.13029300760081433, "grad_norm": 0.10937392711639404, "learning_rate": 1e-06, "loss": 0.0769, "step": 1396 }, { "clip_ratio/high_max": 0.0025528517762722913, "clip_ratio/high_mean": 0.0011172767808602657, "clip_ratio/low_mean": 0.0010585999571048887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002175876754336059, "epoch": 0.13038634070081492, "grad_norm": 0.1044696792960167, "learning_rate": 1e-06, "loss": -0.0065, "step": 1397 }, { "clip_ratio/high_max": 0.0028716621491184924, "clip_ratio/high_mean": 0.0011923000347451307, "clip_ratio/low_mean": 0.001309829756792169, "clip_ratio/low_min": 8.934361176216044e-05, "clip_ratio/region_mean": 0.0025021297988132574, "epoch": 0.1304796738008155, "grad_norm": 0.11332632601261139, "learning_rate": 1e-06, "loss": 0.0237, "step": 1398 }, { "clip_ratio/high_max": 0.0028542759173433296, "clip_ratio/high_mean": 0.0012202570687804837, "clip_ratio/low_mean": 0.001396773132000817, "clip_ratio/low_min": 0.0001337286812486127, "clip_ratio/region_mean": 0.0026170302226091735, "epoch": 0.13057300690081608, "grad_norm": 0.13376747071743011, "learning_rate": 1e-06, "loss": 0.0681, "step": 1399 }, { "clip_ratio/high_max": 0.0029794814909109846, "clip_ratio/high_mean": 0.0011996722350886557, "clip_ratio/low_mean": 0.0012152929084550124, "clip_ratio/low_min": 6.035111073288135e-05, "clip_ratio/region_mean": 0.0024149651508196257, "epoch": 0.13066634000081667, "grad_norm": 0.10504157096147537, "learning_rate": 1e-06, "loss": 0.0247, "step": 1400 }, { "clip_ratio/high_max": 0.0029307726144907065, "clip_ratio/high_mean": 0.0012254597204446327, "clip_ratio/low_mean": 0.0013834683486493304, "clip_ratio/low_min": 0.00013271760235511465, "clip_ratio/region_mean": 0.002608928072731942, "epoch": 0.13075967310081724, "grad_norm": 0.10274302959442139, "learning_rate": 1e-06, "loss": 0.014, "step": 1401 }, { "clip_ratio/high_max": 0.003067772966460325, "clip_ratio/high_mean": 0.001243198294105241, "clip_ratio/low_mean": 0.0015190639351203572, "clip_ratio/low_min": 0.00016442992273368873, "clip_ratio/region_mean": 0.0027622622656053863, "epoch": 0.13085300620081783, "grad_norm": 0.1279619336128235, "learning_rate": 1e-06, "loss": 0.0397, "step": 1402 }, { "clip_ratio/high_max": 0.002966751955682412, "clip_ratio/high_mean": 0.001153536890342366, "clip_ratio/low_mean": 0.0013457574150379514, "clip_ratio/low_min": 4.862488731305348e-05, "clip_ratio/region_mean": 0.0024992943144752644, "epoch": 0.13094633930081842, "grad_norm": 0.11048737168312073, "learning_rate": 1e-06, "loss": 0.0449, "step": 1403 }, { "clip_ratio/high_max": 0.003070895698328968, "clip_ratio/high_mean": 0.0012803471254301257, "clip_ratio/low_mean": 0.0011865049600601196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024668520854902454, "epoch": 0.13103967240081899, "grad_norm": 0.12111610919237137, "learning_rate": 1e-06, "loss": -0.0102, "step": 1404 }, { "clip_ratio/high_max": 0.0026759948523249477, "clip_ratio/high_mean": 0.0010618627202347852, "clip_ratio/low_mean": 0.0013095025860820897, "clip_ratio/low_min": 3.4460575989214703e-05, "clip_ratio/region_mean": 0.0023713653499726206, "epoch": 0.13113300550081958, "grad_norm": 0.11323264986276627, "learning_rate": 1e-06, "loss": 0.0327, "step": 1405 }, { "clip_ratio/high_max": 0.0035132480843458325, "clip_ratio/high_mean": 0.0014271228756115306, "clip_ratio/low_mean": 0.0014637651584052946, "clip_ratio/low_min": 0.00010082941844302695, "clip_ratio/region_mean": 0.002890888041292783, "epoch": 0.13122633860082017, "grad_norm": 0.12540501356124878, "learning_rate": 1e-06, "loss": 0.0593, "step": 1406 }, { "clip_ratio/high_max": 0.00321574870031327, "clip_ratio/high_mean": 0.0012367431954771746, "clip_ratio/low_mean": 0.001250624565727776, "clip_ratio/low_min": 0.00011548271868377924, "clip_ratio/region_mean": 0.0024873677612049505, "epoch": 0.13131967170082076, "grad_norm": 0.11678268760442734, "learning_rate": 1e-06, "loss": 0.0024, "step": 1407 }, { "clip_ratio/high_max": 0.0026184236703556962, "clip_ratio/high_mean": 0.0011091383639723063, "clip_ratio/low_mean": 0.0014941797962819692, "clip_ratio/low_min": 0.00021953478790237568, "clip_ratio/region_mean": 0.002603318222099915, "epoch": 0.13141300480082133, "grad_norm": 0.1242443323135376, "learning_rate": 1e-06, "loss": 0.0732, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00946044921875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 607.646240234375, "completions/mean_terminated_length": 574.3297119140625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.13150633790082192, "grad_norm": 0.1468316614627838, "learning_rate": 1e-06, "loss": 0.0461, "num_tokens": 971711087.0, "reward": 0.6355940103530884, "reward_std": 0.17177210748195648, "rewards/simpleverify_reward/mean": 0.6355939507484436, "rewards/simpleverify_reward/std": 0.48126524686813354, "step": 1409 }, { "clip_ratio/high_max": 0.0022117213666206226, "clip_ratio/high_mean": 0.0009094587112485897, "clip_ratio/low_mean": 0.0004979919094694196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014074506143515464, "epoch": 0.1315996710008225, "grad_norm": 0.11225530505180359, "learning_rate": 1e-06, "loss": -0.0413, "step": 1410 }, { "clip_ratio/high_max": 0.0019903382162738126, "clip_ratio/high_mean": 0.0007690000547881937, "clip_ratio/low_mean": 0.0005090371278129169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012780371725966688, "epoch": 0.13169300410082307, "grad_norm": 0.1273522526025772, "learning_rate": 1e-06, "loss": -0.0041, "step": 1411 }, { "clip_ratio/high_max": 0.0022538252487720456, "clip_ratio/high_mean": 0.000918977599212667, "clip_ratio/low_mean": 0.0005195677422307199, "clip_ratio/low_min": 3.350308179506101e-05, "clip_ratio/region_mean": 0.0014385453359864186, "epoch": 0.13178633720082367, "grad_norm": 0.1204623281955719, "learning_rate": 1e-06, "loss": 0.0407, "step": 1412 }, { "clip_ratio/high_max": 0.00257959209557157, "clip_ratio/high_mean": 0.0009780170239537256, "clip_ratio/low_mean": 0.0005461268738145009, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015241438704833854, "epoch": 0.13187967030082426, "grad_norm": 0.12016411870718002, "learning_rate": 1e-06, "loss": 0.0234, "step": 1413 }, { "clip_ratio/high_max": 0.0020601605137926526, "clip_ratio/high_mean": 0.0008132116072374629, "clip_ratio/low_mean": 0.000520435108228412, "clip_ratio/low_min": 3.0433066967816558e-05, "clip_ratio/region_mean": 0.001333646718194359, "epoch": 0.13197300340082482, "grad_norm": 0.14665919542312622, "learning_rate": 1e-06, "loss": 0.0206, "step": 1414 }, { "clip_ratio/high_max": 0.0022333878878271207, "clip_ratio/high_mean": 0.0009283408799092285, "clip_ratio/low_mean": 0.0006677346827927977, "clip_ratio/low_min": 8.469847489323001e-05, "clip_ratio/region_mean": 0.0015960755918058567, "epoch": 0.13206633650082542, "grad_norm": 0.11612524837255478, "learning_rate": 1e-06, "loss": -0.0059, "step": 1415 }, { "clip_ratio/high_max": 0.0019573812824091874, "clip_ratio/high_mean": 0.000728739829355618, "clip_ratio/low_mean": 0.0008410110503973556, "clip_ratio/low_min": 7.067221031320514e-05, "clip_ratio/region_mean": 0.0015697508424636908, "epoch": 0.132159669600826, "grad_norm": 0.12085966765880585, "learning_rate": 1e-06, "loss": 0.0448, "step": 1416 }, { "clip_ratio/high_max": 0.0018550842796685174, "clip_ratio/high_mean": 0.0008167285559466109, "clip_ratio/low_mean": 0.0007674849348404678, "clip_ratio/low_min": 4.4999127567280084e-05, "clip_ratio/region_mean": 0.001584213474416174, "epoch": 0.13225300270082657, "grad_norm": 0.11525612324476242, "learning_rate": 1e-06, "loss": 0.0088, "step": 1417 }, { "clip_ratio/high_max": 0.0021248445118544623, "clip_ratio/high_mean": 0.0009030141845869366, "clip_ratio/low_mean": 0.0008802323900454212, "clip_ratio/low_min": 9.565523214405403e-05, "clip_ratio/region_mean": 0.0017832465600804426, "epoch": 0.13234633580082716, "grad_norm": 0.12166012078523636, "learning_rate": 1e-06, "loss": 0.0266, "step": 1418 }, { "clip_ratio/high_max": 0.0019606359201134183, "clip_ratio/high_mean": 0.0008451259836874669, "clip_ratio/low_mean": 0.0008642487882752903, "clip_ratio/low_min": 4.388017623568885e-05, "clip_ratio/region_mean": 0.0017093747446779162, "epoch": 0.13243966890082776, "grad_norm": 0.10902386903762817, "learning_rate": 1e-06, "loss": -0.0215, "step": 1419 }, { "clip_ratio/high_max": 0.002401111130893696, "clip_ratio/high_mean": 0.0009743445334606804, "clip_ratio/low_mean": 0.0009989414484152803, "clip_ratio/low_min": 0.00011687158257700503, "clip_ratio/region_mean": 0.001973285965505056, "epoch": 0.13253300200082832, "grad_norm": 0.12161514908075333, "learning_rate": 1e-06, "loss": 0.0177, "step": 1420 }, { "clip_ratio/high_max": 0.0022329996463668067, "clip_ratio/high_mean": 0.0009630080312490463, "clip_ratio/low_mean": 0.0009733226579555776, "clip_ratio/low_min": 1.4371119505085517e-05, "clip_ratio/region_mean": 0.001936330649186857, "epoch": 0.1326263351008289, "grad_norm": 0.1233842521905899, "learning_rate": 1e-06, "loss": 0.0047, "step": 1421 }, { "clip_ratio/high_max": 0.002122777914337348, "clip_ratio/high_mean": 0.0009246743356925435, "clip_ratio/low_mean": 0.0009864262956398306, "clip_ratio/low_min": 3.7579444324364886e-05, "clip_ratio/region_mean": 0.0019111006549792364, "epoch": 0.1327196682008295, "grad_norm": 0.12592491507530212, "learning_rate": 1e-06, "loss": 0.0466, "step": 1422 }, { "clip_ratio/high_max": 0.0025194279587594792, "clip_ratio/high_mean": 0.0009719051213323837, "clip_ratio/low_mean": 0.0009411266473762225, "clip_ratio/low_min": 1.1363636076566763e-05, "clip_ratio/region_mean": 0.0019130317741655745, "epoch": 0.13281300130083007, "grad_norm": 0.10780708491802216, "learning_rate": 1e-06, "loss": 0.0007, "step": 1423 }, { "clip_ratio/high_max": 0.002110450623149518, "clip_ratio/high_mean": 0.0009039778142323485, "clip_ratio/low_mean": 0.0008634897039883072, "clip_ratio/low_min": 3.06673209706787e-05, "clip_ratio/region_mean": 0.0017674674600129947, "epoch": 0.13290633440083066, "grad_norm": 0.1022372916340828, "learning_rate": 1e-06, "loss": 0.0045, "step": 1424 }, { "clip_ratio/high_max": 0.002416127477772534, "clip_ratio/high_mean": 0.0009341938730358379, "clip_ratio/low_mean": 0.000875459831149783, "clip_ratio/low_min": 0.00011759636254282668, "clip_ratio/region_mean": 0.001809653709642589, "epoch": 0.13299966750083125, "grad_norm": 0.10229035466909409, "learning_rate": 1e-06, "loss": 0.0153, "step": 1425 }, { "clip_ratio/high_max": 0.0023262972608790733, "clip_ratio/high_mean": 0.000938324024900794, "clip_ratio/low_mean": 0.0010679275219445117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002006251539569348, "epoch": 0.13309300060083182, "grad_norm": 0.11472427845001221, "learning_rate": 1e-06, "loss": 0.0285, "step": 1426 }, { "clip_ratio/high_max": 0.002210192436905345, "clip_ratio/high_mean": 0.0008469325894111535, "clip_ratio/low_mean": 0.0009266447996196803, "clip_ratio/low_min": 0.00010994085187121527, "clip_ratio/region_mean": 0.0017735773653839715, "epoch": 0.1331863337008324, "grad_norm": 0.11633387953042984, "learning_rate": 1e-06, "loss": 0.0626, "step": 1427 }, { "clip_ratio/high_max": 0.00241406295390334, "clip_ratio/high_mean": 0.0009293564216932282, "clip_ratio/low_mean": 0.0008625709851912688, "clip_ratio/low_min": 3.370303511474049e-05, "clip_ratio/region_mean": 0.0017919274541782215, "epoch": 0.133279666800833, "grad_norm": 0.10554017871618271, "learning_rate": 1e-06, "loss": -0.018, "step": 1428 }, { "clip_ratio/high_max": 0.002312235134013463, "clip_ratio/high_mean": 0.000986177175946068, "clip_ratio/low_mean": 0.000980331202299567, "clip_ratio/low_min": 6.621233842452057e-05, "clip_ratio/region_mean": 0.001966508374607656, "epoch": 0.1333729999008336, "grad_norm": 0.10500352829694748, "learning_rate": 1e-06, "loss": 0.0311, "step": 1429 }, { "clip_ratio/high_max": 0.0022750381322111934, "clip_ratio/high_mean": 0.000987979199635447, "clip_ratio/low_mean": 0.000954465547692962, "clip_ratio/low_min": 5.3228823162498884e-05, "clip_ratio/region_mean": 0.0019424447673372924, "epoch": 0.13346633300083416, "grad_norm": 0.12010016292333603, "learning_rate": 1e-06, "loss": -0.006, "step": 1430 }, { "clip_ratio/high_max": 0.0026293498958693817, "clip_ratio/high_mean": 0.001012986609566724, "clip_ratio/low_mean": 0.0010003645529650385, "clip_ratio/low_min": 0.00010023885806731414, "clip_ratio/region_mean": 0.002013351164350752, "epoch": 0.13355966610083475, "grad_norm": 0.22450974583625793, "learning_rate": 1e-06, "loss": -0.0008, "step": 1431 }, { "clip_ratio/high_max": 0.0023389422058244236, "clip_ratio/high_mean": 0.0010190101493208203, "clip_ratio/low_mean": 0.0010142018472834025, "clip_ratio/low_min": 7.786815240251599e-05, "clip_ratio/region_mean": 0.002033211989328265, "epoch": 0.13365299920083534, "grad_norm": 0.1232704147696495, "learning_rate": 1e-06, "loss": 0.0446, "step": 1432 }, { "clip_ratio/high_max": 0.002686832784092985, "clip_ratio/high_mean": 0.0010405555131001165, "clip_ratio/low_mean": 0.0011612515736487694, "clip_ratio/low_min": 5.750603577325819e-05, "clip_ratio/region_mean": 0.0022018070885678753, "epoch": 0.1337463323008359, "grad_norm": 0.11670401692390442, "learning_rate": 1e-06, "loss": 0.023, "step": 1433 }, { "clip_ratio/high_max": 0.0025915017977240495, "clip_ratio/high_mean": 0.0010605784591461997, "clip_ratio/low_mean": 0.0010589270295895403, "clip_ratio/low_min": 7.438726788677741e-05, "clip_ratio/region_mean": 0.002119505465088878, "epoch": 0.1338396654008365, "grad_norm": 0.1217108964920044, "learning_rate": 1e-06, "loss": 0.0312, "step": 1434 }, { "clip_ratio/high_max": 0.0023957593148224987, "clip_ratio/high_mean": 0.0008220075787903625, "clip_ratio/low_mean": 0.0010528042148507666, "clip_ratio/low_min": 3.618992559495382e-05, "clip_ratio/region_mean": 0.0018748117799987085, "epoch": 0.1339329985008371, "grad_norm": 0.10655663162469864, "learning_rate": 1e-06, "loss": 0.0313, "step": 1435 }, { "clip_ratio/high_max": 0.0024075108449324034, "clip_ratio/high_mean": 0.000968636930338107, "clip_ratio/low_mean": 0.0010758108910522424, "clip_ratio/low_min": 7.344112054852303e-05, "clip_ratio/region_mean": 0.0020444478213903494, "epoch": 0.13402633160083766, "grad_norm": 0.10565336793661118, "learning_rate": 1e-06, "loss": 0.0212, "step": 1436 }, { "clip_ratio/high_max": 0.0023795274828444235, "clip_ratio/high_mean": 0.0009553498221066548, "clip_ratio/low_mean": 0.0011769353768613655, "clip_ratio/low_min": 9.175698505714536e-05, "clip_ratio/region_mean": 0.002132285233528819, "epoch": 0.13411966470083825, "grad_norm": 0.11005101352930069, "learning_rate": 1e-06, "loss": 0.0308, "step": 1437 }, { "clip_ratio/high_max": 0.0025167810235871, "clip_ratio/high_mean": 0.0010111615592904855, "clip_ratio/low_mean": 0.0010541302799538244, "clip_ratio/low_min": 8.229420927818865e-05, "clip_ratio/region_mean": 0.002065291781036649, "epoch": 0.13421299780083884, "grad_norm": 0.11551311612129211, "learning_rate": 1e-06, "loss": 0.0004, "step": 1438 }, { "clip_ratio/high_max": 0.002602444546937477, "clip_ratio/high_mean": 0.0010254999961034628, "clip_ratio/low_mean": 0.001137688261223957, "clip_ratio/low_min": 1.6807853171485476e-05, "clip_ratio/region_mean": 0.002163188299164176, "epoch": 0.1343063309008394, "grad_norm": 0.11096356064081192, "learning_rate": 1e-06, "loss": 0.0092, "step": 1439 }, { "clip_ratio/high_max": 0.0026612578876665793, "clip_ratio/high_mean": 0.001025422296152101, "clip_ratio/low_mean": 0.0010505605787329841, "clip_ratio/low_min": 0.0001303464887314476, "clip_ratio/region_mean": 0.0020759829130838625, "epoch": 0.13439966400084, "grad_norm": 0.1314217746257782, "learning_rate": 1e-06, "loss": 0.0076, "step": 1440 }, { "clip_ratio/high_max": 0.002779732611088548, "clip_ratio/high_mean": 0.0010996875644195825, "clip_ratio/low_mean": 0.0011635952268989058, "clip_ratio/low_min": 0.00017972051682590973, "clip_ratio/region_mean": 0.0022632827967754565, "epoch": 0.1344929971008406, "grad_norm": 0.12370234727859497, "learning_rate": 1e-06, "loss": -0.0031, "step": 1441 }, { "clip_ratio/high_max": 0.0025649454910308123, "clip_ratio/high_mean": 0.0009810930150706554, "clip_ratio/low_mean": 0.0012442515981092583, "clip_ratio/low_min": 0.00017127721184806433, "clip_ratio/region_mean": 0.0022253446441027336, "epoch": 0.13458633020084115, "grad_norm": 0.11214728653430939, "learning_rate": 1e-06, "loss": 0.0249, "step": 1442 }, { "clip_ratio/high_max": 0.0024004285878618248, "clip_ratio/high_mean": 0.0009427154527656967, "clip_ratio/low_mean": 0.0010575019568932476, "clip_ratio/low_min": 1.5299878214136697e-05, "clip_ratio/region_mean": 0.0020002174060209654, "epoch": 0.13467966330084175, "grad_norm": 0.10256022959947586, "learning_rate": 1e-06, "loss": 0.0311, "step": 1443 }, { "clip_ratio/high_max": 0.0022697398671880364, "clip_ratio/high_mean": 0.0008995703392429277, "clip_ratio/low_mean": 0.0013505923016055021, "clip_ratio/low_min": 0.00016542463890800718, "clip_ratio/region_mean": 0.002250162622658536, "epoch": 0.13477299640084234, "grad_norm": 0.10748632997274399, "learning_rate": 1e-06, "loss": 0.0301, "step": 1444 }, { "clip_ratio/high_max": 0.0025347609735035803, "clip_ratio/high_mean": 0.0010447489330545068, "clip_ratio/low_mean": 0.0010094405279232888, "clip_ratio/low_min": 7.571061723865569e-05, "clip_ratio/region_mean": 0.0020541894700727426, "epoch": 0.1348663295008429, "grad_norm": 0.12649357318878174, "learning_rate": 1e-06, "loss": -0.0102, "step": 1445 }, { "clip_ratio/high_max": 0.002281164677697234, "clip_ratio/high_mean": 0.0009125149135797983, "clip_ratio/low_mean": 0.000942053011385724, "clip_ratio/low_min": 3.705899871420115e-05, "clip_ratio/region_mean": 0.0018545679340604693, "epoch": 0.1349596626008435, "grad_norm": 0.10778260976076126, "learning_rate": 1e-06, "loss": 0.0189, "step": 1446 }, { "clip_ratio/high_max": 0.0028327962281764485, "clip_ratio/high_mean": 0.0010599613597150892, "clip_ratio/low_mean": 0.0008716511892998824, "clip_ratio/low_min": 2.902926098613534e-05, "clip_ratio/region_mean": 0.0019316125544719398, "epoch": 0.1350529957008441, "grad_norm": 0.20406658947467804, "learning_rate": 1e-06, "loss": 0.0024, "step": 1447 }, { "clip_ratio/high_max": 0.002531581892981194, "clip_ratio/high_mean": 0.0010299961377313593, "clip_ratio/low_mean": 0.0010565808552200906, "clip_ratio/low_min": 8.090556821116479e-05, "clip_ratio/region_mean": 0.002086576983856503, "epoch": 0.13514632880084468, "grad_norm": 0.1103234514594078, "learning_rate": 1e-06, "loss": -0.0025, "step": 1448 }, { "clip_ratio/high_max": 0.0022429615928558633, "clip_ratio/high_mean": 0.0009579505167494062, "clip_ratio/low_mean": 0.0008745914383325726, "clip_ratio/low_min": 3.975093386543449e-05, "clip_ratio/region_mean": 0.0018325419077882543, "epoch": 0.13523966190084524, "grad_norm": 0.12768018245697021, "learning_rate": 1e-06, "loss": 0.0167, "step": 1449 }, { "clip_ratio/high_max": 0.0022056354937376454, "clip_ratio/high_mean": 0.0008714211489859736, "clip_ratio/low_mean": 0.0011531706077221315, "clip_ratio/low_min": 8.396076736971736e-05, "clip_ratio/region_mean": 0.002024591783992946, "epoch": 0.13533299500084583, "grad_norm": 0.10748114436864853, "learning_rate": 1e-06, "loss": 0.0463, "step": 1450 }, { "clip_ratio/high_max": 0.002638293561176397, "clip_ratio/high_mean": 0.0010356989405408967, "clip_ratio/low_mean": 0.0010219905525445938, "clip_ratio/low_min": 6.249376747291535e-05, "clip_ratio/region_mean": 0.0020576894530677237, "epoch": 0.13542632810084643, "grad_norm": 0.11492206901311874, "learning_rate": 1e-06, "loss": 0.0375, "step": 1451 }, { "clip_ratio/high_max": 0.0023746957813273184, "clip_ratio/high_mean": 0.0010006988632085267, "clip_ratio/low_mean": 0.0008632588123873575, "clip_ratio/low_min": 9.69443135545589e-06, "clip_ratio/region_mean": 0.0018639576956047677, "epoch": 0.135519661200847, "grad_norm": 0.10269959270954132, "learning_rate": 1e-06, "loss": -0.014, "step": 1452 }, { "clip_ratio/high_max": 0.0024743874309933744, "clip_ratio/high_mean": 0.0009452612412133021, "clip_ratio/low_mean": 0.001193292595417006, "clip_ratio/low_min": 0.00012857609362981748, "clip_ratio/region_mean": 0.002138553863915149, "epoch": 0.13561299430084758, "grad_norm": 0.12056542187929153, "learning_rate": 1e-06, "loss": 0.0745, "step": 1453 }, { "clip_ratio/high_max": 0.0022832017712062225, "clip_ratio/high_mean": 0.001005373411317123, "clip_ratio/low_mean": 0.0011942303499381524, "clip_ratio/low_min": 0.00011587541666813195, "clip_ratio/region_mean": 0.0021996038121869788, "epoch": 0.13570632740084818, "grad_norm": 0.1196768581867218, "learning_rate": 1e-06, "loss": 0.0431, "step": 1454 }, { "clip_ratio/high_max": 0.002098278739140369, "clip_ratio/high_mean": 0.0009257079072995111, "clip_ratio/low_mean": 0.0012094331978005357, "clip_ratio/low_min": 0.0001223248591486481, "clip_ratio/region_mean": 0.0021351410687202588, "epoch": 0.13579966050084874, "grad_norm": 0.11572129279375076, "learning_rate": 1e-06, "loss": 0.0511, "step": 1455 }, { "clip_ratio/high_max": 0.002783639749395661, "clip_ratio/high_mean": 0.001167096932476852, "clip_ratio/low_mean": 0.0011270478389633354, "clip_ratio/low_min": 1.1636566341621801e-05, "clip_ratio/region_mean": 0.002294144738698378, "epoch": 0.13589299360084933, "grad_norm": 0.12415316700935364, "learning_rate": 1e-06, "loss": 0.0077, "step": 1456 }, { "clip_ratio/high_max": 0.0024204809160437435, "clip_ratio/high_mean": 0.0009866714590316406, "clip_ratio/low_mean": 0.0012838316506531555, "clip_ratio/low_min": 5.947602039668709e-05, "clip_ratio/region_mean": 0.0022705030933138914, "epoch": 0.13598632670084992, "grad_norm": 0.11749020963907242, "learning_rate": 1e-06, "loss": 0.0451, "step": 1457 }, { "clip_ratio/high_max": 0.0023182419390650466, "clip_ratio/high_mean": 0.0009759142149050604, "clip_ratio/low_mean": 0.0013810494820063468, "clip_ratio/low_min": 0.00014673938130727038, "clip_ratio/region_mean": 0.0023569636905449443, "epoch": 0.1360796598008505, "grad_norm": 0.1164017990231514, "learning_rate": 1e-06, "loss": 0.0705, "step": 1458 }, { "clip_ratio/high_max": 0.002677210701222066, "clip_ratio/high_mean": 0.0011295473705104087, "clip_ratio/low_mean": 0.0009461676654609619, "clip_ratio/low_min": 5.197474092710763e-05, "clip_ratio/region_mean": 0.0020757149904966354, "epoch": 0.13617299290085108, "grad_norm": 0.11543598771095276, "learning_rate": 1e-06, "loss": 0.0161, "step": 1459 }, { "clip_ratio/high_max": 0.0027790833119070157, "clip_ratio/high_mean": 0.0011419239217502763, "clip_ratio/low_mean": 0.001205698001285782, "clip_ratio/low_min": 0.00013721889263251796, "clip_ratio/region_mean": 0.002347621863009408, "epoch": 0.13626632600085167, "grad_norm": 0.12189850211143494, "learning_rate": 1e-06, "loss": 0.0283, "step": 1460 }, { "clip_ratio/high_max": 0.0031833853718126193, "clip_ratio/high_mean": 0.0012375878359307535, "clip_ratio/low_mean": 0.0010633088550093817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023008967109490186, "epoch": 0.13635965910085224, "grad_norm": 0.14021095633506775, "learning_rate": 1e-06, "loss": -0.0025, "step": 1461 }, { "clip_ratio/high_max": 0.003089932339207735, "clip_ratio/high_mean": 0.0011884087252838071, "clip_ratio/low_mean": 0.001165329995274078, "clip_ratio/low_min": 7.758036554150749e-05, "clip_ratio/region_mean": 0.0023537386950920336, "epoch": 0.13645299220085283, "grad_norm": 0.1187484860420227, "learning_rate": 1e-06, "loss": 0.0065, "step": 1462 }, { "clip_ratio/high_max": 0.0027623161076917313, "clip_ratio/high_mean": 0.0011216348339075921, "clip_ratio/low_mean": 0.0011028113585780375, "clip_ratio/low_min": 2.3593809601152316e-05, "clip_ratio/region_mean": 0.0022244461797527038, "epoch": 0.13654632530085342, "grad_norm": 0.12906920909881592, "learning_rate": 1e-06, "loss": -0.0003, "step": 1463 }, { "clip_ratio/high_max": 0.0027788856968982145, "clip_ratio/high_mean": 0.0011064249811170157, "clip_ratio/low_mean": 0.0011159217938256916, "clip_ratio/low_min": 6.800706432841253e-05, "clip_ratio/region_mean": 0.002222346760390792, "epoch": 0.13663965840085399, "grad_norm": 0.11796051263809204, "learning_rate": 1e-06, "loss": 0.0246, "step": 1464 }, { "clip_ratio/high_max": 0.0029790025073452853, "clip_ratio/high_mean": 0.0011487963602121454, "clip_ratio/low_mean": 0.001116381197789451, "clip_ratio/low_min": 8.464285565423779e-05, "clip_ratio/region_mean": 0.0022651775580015965, "epoch": 0.13673299150085458, "grad_norm": 0.1100466251373291, "learning_rate": 1e-06, "loss": 0.0041, "step": 1465 }, { "clip_ratio/high_max": 0.002850481600034982, "clip_ratio/high_mean": 0.0013040837438893504, "clip_ratio/low_mean": 0.0009520533167233225, "clip_ratio/low_min": 3.74925002688542e-05, "clip_ratio/region_mean": 0.0022561370860785246, "epoch": 0.13682632460085517, "grad_norm": 0.33849474787712097, "learning_rate": 1e-06, "loss": -0.0064, "step": 1466 }, { "clip_ratio/high_max": 0.0026202483277302235, "clip_ratio/high_mean": 0.001034944554703543, "clip_ratio/low_mean": 0.0010917742984020151, "clip_ratio/low_min": 3.7937848901492544e-05, "clip_ratio/region_mean": 0.0021267188494675793, "epoch": 0.13691965770085573, "grad_norm": 0.11754457652568817, "learning_rate": 1e-06, "loss": 0.0232, "step": 1467 }, { "clip_ratio/high_max": 0.0027402813138905913, "clip_ratio/high_mean": 0.0010319926659576595, "clip_ratio/low_mean": 0.0012252390952198766, "clip_ratio/low_min": 0.0001071486112778075, "clip_ratio/region_mean": 0.0022572317320737056, "epoch": 0.13701299080085633, "grad_norm": 0.1115499809384346, "learning_rate": 1e-06, "loss": 0.0781, "step": 1468 }, { "clip_ratio/high_max": 0.0022908212267793715, "clip_ratio/high_mean": 0.0009174980950774625, "clip_ratio/low_mean": 0.0010252227693854365, "clip_ratio/low_min": 2.6415891625219956e-05, "clip_ratio/region_mean": 0.001942720904480666, "epoch": 0.13710632390085692, "grad_norm": 0.09845107793807983, "learning_rate": 1e-06, "loss": 0.036, "step": 1469 }, { "clip_ratio/high_max": 0.0025174013135256246, "clip_ratio/high_mean": 0.0010821056366694393, "clip_ratio/low_mean": 0.0010607309122860897, "clip_ratio/low_min": 5.6766577472444624e-05, "clip_ratio/region_mean": 0.002142836521670688, "epoch": 0.1371996570008575, "grad_norm": 0.11507207900285721, "learning_rate": 1e-06, "loss": 0.0527, "step": 1470 }, { "clip_ratio/high_max": 0.0028368353232508525, "clip_ratio/high_mean": 0.0010975784716720227, "clip_ratio/low_mean": 0.0011960793635807931, "clip_ratio/low_min": 4.760220690513961e-05, "clip_ratio/region_mean": 0.002293657817062922, "epoch": 0.13729299010085808, "grad_norm": 0.10873386263847351, "learning_rate": 1e-06, "loss": 0.0425, "step": 1471 }, { "clip_ratio/high_max": 0.0027898061234736815, "clip_ratio/high_mean": 0.001167829723272007, "clip_ratio/low_mean": 0.0011882341605087277, "clip_ratio/low_min": 1.1750328667403664e-05, "clip_ratio/region_mean": 0.002356063916522544, "epoch": 0.13738632320085867, "grad_norm": 0.2642475366592407, "learning_rate": 1e-06, "loss": 0.0214, "step": 1472 }, { "clip_ratio/high_max": 0.0027806581929326057, "clip_ratio/high_mean": 0.0010844345415534917, "clip_ratio/low_mean": 0.0010767722087621223, "clip_ratio/low_min": 3.2729547456256114e-05, "clip_ratio/region_mean": 0.00216120682307519, "epoch": 0.13747965630085926, "grad_norm": 0.11807902902364731, "learning_rate": 1e-06, "loss": 0.0583, "step": 1473 }, { "clip_ratio/high_max": 0.00230437903519487, "clip_ratio/high_mean": 0.0009912665336742066, "clip_ratio/low_mean": 0.0013206935072958004, "clip_ratio/low_min": 0.00017362347898597363, "clip_ratio/region_mean": 0.0023119600300560705, "epoch": 0.13757298940085982, "grad_norm": 0.10598843544721603, "learning_rate": 1e-06, "loss": 0.0528, "step": 1474 }, { "clip_ratio/high_max": 0.0026677254936657846, "clip_ratio/high_mean": 0.001104568629671121, "clip_ratio/low_mean": 0.0013112452252244111, "clip_ratio/low_min": 0.00015924924537102925, "clip_ratio/region_mean": 0.002415813876723405, "epoch": 0.13766632250086042, "grad_norm": 0.13652527332305908, "learning_rate": 1e-06, "loss": 0.0274, "step": 1475 }, { "clip_ratio/high_max": 0.002653229093994014, "clip_ratio/high_mean": 0.0010956492460536538, "clip_ratio/low_mean": 0.0011827794260170776, "clip_ratio/low_min": 2.0404831957421266e-05, "clip_ratio/region_mean": 0.0022784286556998268, "epoch": 0.137759655600861, "grad_norm": 0.14880774915218353, "learning_rate": 1e-06, "loss": 0.0736, "step": 1476 }, { "clip_ratio/high_max": 0.0027769236112362705, "clip_ratio/high_mean": 0.0012398618237057235, "clip_ratio/low_mean": 0.0010940074280370027, "clip_ratio/low_min": 6.257114364416339e-05, "clip_ratio/region_mean": 0.0023338692844845355, "epoch": 0.13785298870086157, "grad_norm": 0.12175989896059036, "learning_rate": 1e-06, "loss": 0.0544, "step": 1477 }, { "clip_ratio/high_max": 0.002621554202050902, "clip_ratio/high_mean": 0.0011173183247592533, "clip_ratio/low_mean": 0.0010841846651601372, "clip_ratio/low_min": 2.900824893004028e-05, "clip_ratio/region_mean": 0.00220150296081556, "epoch": 0.13794632180086216, "grad_norm": 0.11229871958494186, "learning_rate": 1e-06, "loss": 0.0081, "step": 1478 }, { "clip_ratio/high_max": 0.0025504994409857318, "clip_ratio/high_mean": 0.0010641595035849605, "clip_ratio/low_mean": 0.001272096731554484, "clip_ratio/low_min": 0.00011690111205098219, "clip_ratio/region_mean": 0.002336256300623063, "epoch": 0.13803965490086276, "grad_norm": 0.10911792516708374, "learning_rate": 1e-06, "loss": 0.0363, "step": 1479 }, { "clip_ratio/high_max": 0.0027662291686283424, "clip_ratio/high_mean": 0.0009855065327428747, "clip_ratio/low_mean": 0.0012573996973515023, "clip_ratio/low_min": 0.00013462905553751625, "clip_ratio/region_mean": 0.0022429062591982074, "epoch": 0.13813298800086332, "grad_norm": 0.11720386892557144, "learning_rate": 1e-06, "loss": 0.0873, "step": 1480 }, { "clip_ratio/high_max": 0.002455437512253411, "clip_ratio/high_mean": 0.0011465769639471546, "clip_ratio/low_mean": 0.0008920716918510152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020386486503412016, "epoch": 0.1382263211008639, "grad_norm": 0.10856595635414124, "learning_rate": 1e-06, "loss": -0.0525, "step": 1481 }, { "clip_ratio/high_max": 0.003157646337058395, "clip_ratio/high_mean": 0.0012697432248387486, "clip_ratio/low_mean": 0.0013415203011390986, "clip_ratio/low_min": 0.00017994404333876446, "clip_ratio/region_mean": 0.002611263498693006, "epoch": 0.1383196542008645, "grad_norm": 0.11082890629768372, "learning_rate": 1e-06, "loss": 0.0241, "step": 1482 }, { "clip_ratio/high_max": 0.002796586908516474, "clip_ratio/high_mean": 0.001134596750489436, "clip_ratio/low_mean": 0.0012173211980552878, "clip_ratio/low_min": 5.783602773590246e-05, "clip_ratio/region_mean": 0.0023519179740105756, "epoch": 0.13841298730086507, "grad_norm": 0.11337212473154068, "learning_rate": 1e-06, "loss": 0.0156, "step": 1483 }, { "clip_ratio/high_max": 0.002386477601248771, "clip_ratio/high_mean": 0.0011547439389687497, "clip_ratio/low_mean": 0.0011973719647357939, "clip_ratio/low_min": 5.4087871831143275e-05, "clip_ratio/region_mean": 0.002352115901885554, "epoch": 0.13850632040086566, "grad_norm": 0.12178612500429153, "learning_rate": 1e-06, "loss": -0.0184, "step": 1484 }, { "clip_ratio/high_max": 0.0028518163599073887, "clip_ratio/high_mean": 0.0011635217379080132, "clip_ratio/low_mean": 0.0015868476548348553, "clip_ratio/low_min": 9.053309167939005e-05, "clip_ratio/region_mean": 0.0027503694145707414, "epoch": 0.13859965350086625, "grad_norm": 0.8915780782699585, "learning_rate": 1e-06, "loss": 0.0271, "step": 1485 }, { "clip_ratio/high_max": 0.0028316673924564384, "clip_ratio/high_mean": 0.0012044939321640413, "clip_ratio/low_mean": 0.0013307863246154739, "clip_ratio/low_min": 6.622794171562418e-05, "clip_ratio/region_mean": 0.0025352802622364834, "epoch": 0.13869298660086682, "grad_norm": 0.12048197537660599, "learning_rate": 1e-06, "loss": 0.0169, "step": 1486 }, { "clip_ratio/high_max": 0.00242381803036551, "clip_ratio/high_mean": 0.0009514506655250443, "clip_ratio/low_mean": 0.0012668999952438753, "clip_ratio/low_min": 6.188119004946202e-05, "clip_ratio/region_mean": 0.0022183506589499302, "epoch": 0.1387863197008674, "grad_norm": 0.11273396760225296, "learning_rate": 1e-06, "loss": 0.0679, "step": 1487 }, { "clip_ratio/high_max": 0.002115559844241943, "clip_ratio/high_mean": 0.0009105337558139581, "clip_ratio/low_mean": 0.0012671837903326377, "clip_ratio/low_min": 6.36501745248097e-05, "clip_ratio/region_mean": 0.00217771756433649, "epoch": 0.138879652800868, "grad_norm": 0.11106456071138382, "learning_rate": 1e-06, "loss": 0.0883, "step": 1488 }, { "clip_ratio/high_max": 0.0032058905926533043, "clip_ratio/high_mean": 0.0013475653340719873, "clip_ratio/low_mean": 0.001286232351048966, "clip_ratio/low_min": 6.045325608283747e-05, "clip_ratio/region_mean": 0.0026337976596551016, "epoch": 0.13897298590086857, "grad_norm": 0.13243578374385834, "learning_rate": 1e-06, "loss": 0.0016, "step": 1489 }, { "clip_ratio/high_max": 0.0030218889587558806, "clip_ratio/high_mean": 0.0014174635944073088, "clip_ratio/low_mean": 0.0013734282765653916, "clip_ratio/low_min": 6.388055044226348e-05, "clip_ratio/region_mean": 0.0027908918709727004, "epoch": 0.13906631900086916, "grad_norm": 0.13378465175628662, "learning_rate": 1e-06, "loss": 0.0032, "step": 1490 }, { "clip_ratio/high_max": 0.002750161314907018, "clip_ratio/high_mean": 0.0012205466300656553, "clip_ratio/low_mean": 0.0012897806554974522, "clip_ratio/low_min": 3.115206618531374e-05, "clip_ratio/region_mean": 0.0025103273073909804, "epoch": 0.13915965210086975, "grad_norm": 0.1216592937707901, "learning_rate": 1e-06, "loss": 0.0075, "step": 1491 }, { "clip_ratio/high_max": 0.0034090596018359065, "clip_ratio/high_mean": 0.0013427475350908935, "clip_ratio/low_mean": 0.0013731910985370632, "clip_ratio/low_min": 0.00012055199294991326, "clip_ratio/region_mean": 0.0027159386372659355, "epoch": 0.13925298520087034, "grad_norm": 0.12644340097904205, "learning_rate": 1e-06, "loss": 0.035, "step": 1492 }, { "clip_ratio/high_max": 0.0028981095310882665, "clip_ratio/high_mean": 0.0012044994709867751, "clip_ratio/low_mean": 0.0014194349605531897, "clip_ratio/low_min": 7.9481867942377e-05, "clip_ratio/region_mean": 0.002623934495204594, "epoch": 0.1393463183008709, "grad_norm": 0.10122072696685791, "learning_rate": 1e-06, "loss": 0.0344, "step": 1493 }, { "clip_ratio/high_max": 0.0027495212052599527, "clip_ratio/high_mean": 0.0010657707225618651, "clip_ratio/low_mean": 0.0014343884904519655, "clip_ratio/low_min": 8.432873619312886e-05, "clip_ratio/region_mean": 0.0025001592221087776, "epoch": 0.1394396514008715, "grad_norm": 0.1175919622182846, "learning_rate": 1e-06, "loss": 0.0458, "step": 1494 }, { "clip_ratio/high_max": 0.003198013157089008, "clip_ratio/high_mean": 0.0012178584383946145, "clip_ratio/low_mean": 0.001467868009058293, "clip_ratio/low_min": 0.00019013058044947684, "clip_ratio/region_mean": 0.0026857264747377485, "epoch": 0.1395329845008721, "grad_norm": 0.11551834642887115, "learning_rate": 1e-06, "loss": 0.0376, "step": 1495 }, { "clip_ratio/high_max": 0.002495275213732384, "clip_ratio/high_mean": 0.001112371477574925, "clip_ratio/low_mean": 0.0016496615135110915, "clip_ratio/low_min": 0.0002636298904690193, "clip_ratio/region_mean": 0.002762032934697345, "epoch": 0.13962631760087266, "grad_norm": 0.12206330895423889, "learning_rate": 1e-06, "loss": 0.0396, "step": 1496 }, { "clip_ratio/high_max": 0.0031529958941973746, "clip_ratio/high_mean": 0.0012376550475892145, "clip_ratio/low_mean": 0.0014256401354941772, "clip_ratio/low_min": 0.00013022826624364825, "clip_ratio/region_mean": 0.0026632951630745083, "epoch": 0.13971965070087325, "grad_norm": 0.11085369437932968, "learning_rate": 1e-06, "loss": 0.0231, "step": 1497 }, { "clip_ratio/high_max": 0.00304369097284507, "clip_ratio/high_mean": 0.0012894755527668167, "clip_ratio/low_mean": 0.001411107596140937, "clip_ratio/low_min": 5.437935033114627e-05, "clip_ratio/region_mean": 0.0027005832016584463, "epoch": 0.13981298380087384, "grad_norm": 0.10914954543113708, "learning_rate": 1e-06, "loss": -0.024, "step": 1498 }, { "clip_ratio/high_max": 0.003024964840733446, "clip_ratio/high_mean": 0.0011884650120919105, "clip_ratio/low_mean": 0.0015281092637451366, "clip_ratio/low_min": 0.00015683392666687723, "clip_ratio/region_mean": 0.002716574199439492, "epoch": 0.1399063169008744, "grad_norm": 0.12459231168031693, "learning_rate": 1e-06, "loss": 0.0287, "step": 1499 }, { "clip_ratio/high_max": 0.0026434208630234934, "clip_ratio/high_mean": 0.0011657636187010212, "clip_ratio/low_mean": 0.0014828504899924155, "clip_ratio/low_min": 0.0002036403102465556, "clip_ratio/region_mean": 0.0026486140995984897, "epoch": 0.139999650000875, "grad_norm": 0.11421504616737366, "learning_rate": 1e-06, "loss": 0.0112, "step": 1500 }, { "clip_ratio/high_max": 0.002660350342921447, "clip_ratio/high_mean": 0.00120506559687783, "clip_ratio/low_mean": 0.0013747928351222072, "clip_ratio/low_min": 6.180469790706411e-05, "clip_ratio/region_mean": 0.0025798583519645035, "epoch": 0.1400929831008756, "grad_norm": 0.12463909387588501, "learning_rate": 1e-06, "loss": 0.0279, "step": 1501 }, { "clip_ratio/high_max": 0.0031790221473784186, "clip_ratio/high_mean": 0.0013923024052928668, "clip_ratio/low_mean": 0.0014092396559135523, "clip_ratio/low_min": 5.6706932809902355e-05, "clip_ratio/region_mean": 0.002801542097586207, "epoch": 0.14018631620087615, "grad_norm": 0.11176576465368271, "learning_rate": 1e-06, "loss": -0.008, "step": 1502 }, { "clip_ratio/high_max": 0.003155170845275279, "clip_ratio/high_mean": 0.0014255678615882061, "clip_ratio/low_mean": 0.0013273939566715853, "clip_ratio/low_min": 0.00010759520137071377, "clip_ratio/region_mean": 0.002752961801888887, "epoch": 0.14027964930087675, "grad_norm": 0.12618066370487213, "learning_rate": 1e-06, "loss": 0.0259, "step": 1503 }, { "clip_ratio/high_max": 0.0029176129610277712, "clip_ratio/high_mean": 0.0011191985668119742, "clip_ratio/low_mean": 0.0013986341982672457, "clip_ratio/low_min": 4.165278369328007e-05, "clip_ratio/region_mean": 0.0025178326832246967, "epoch": 0.14037298240087734, "grad_norm": 0.11602681875228882, "learning_rate": 1e-06, "loss": 0.0284, "step": 1504 }, { "clip_ratio/high_max": 0.0029687997594010085, "clip_ratio/high_mean": 0.0012583686147991102, "clip_ratio/low_mean": 0.0013929991746408632, "clip_ratio/low_min": 0.0001012433231153409, "clip_ratio/region_mean": 0.0026513678021728992, "epoch": 0.1404663155008779, "grad_norm": 0.11358543485403061, "learning_rate": 1e-06, "loss": 0.0325, "step": 1505 }, { "clip_ratio/high_max": 0.0026435050822328776, "clip_ratio/high_mean": 0.0010515982685319614, "clip_ratio/low_mean": 0.0015071502548380522, "clip_ratio/low_min": 0.00012342986701696645, "clip_ratio/region_mean": 0.0025587485943106003, "epoch": 0.1405596486008785, "grad_norm": 0.12584316730499268, "learning_rate": 1e-06, "loss": 0.0688, "step": 1506 }, { "clip_ratio/high_max": 0.002997233343194239, "clip_ratio/high_mean": 0.0011706749100994784, "clip_ratio/low_mean": 0.0015161802621150855, "clip_ratio/low_min": 4.1869564483931754e-05, "clip_ratio/region_mean": 0.002686855135834776, "epoch": 0.1406529817008791, "grad_norm": 0.11772178113460541, "learning_rate": 1e-06, "loss": 0.054, "step": 1507 }, { "clip_ratio/high_max": 0.003039415714738425, "clip_ratio/high_mean": 0.0013107438098813873, "clip_ratio/low_mean": 0.001529441764432704, "clip_ratio/low_min": 2.7337498067936394e-05, "clip_ratio/region_mean": 0.0028401855743140914, "epoch": 0.14074631480087965, "grad_norm": 0.6940681338310242, "learning_rate": 1e-06, "loss": 0.058, "step": 1508 }, { "clip_ratio/high_max": 0.002835014762240462, "clip_ratio/high_mean": 0.0011606671087065479, "clip_ratio/low_mean": 0.001262158461031504, "clip_ratio/low_min": 0.0001038056561810663, "clip_ratio/region_mean": 0.002422825520625338, "epoch": 0.14083964790088024, "grad_norm": 0.11285444349050522, "learning_rate": 1e-06, "loss": 0.0155, "step": 1509 }, { "clip_ratio/high_max": 0.0027010888152290136, "clip_ratio/high_mean": 0.001216887823829893, "clip_ratio/low_mean": 0.001354956802970264, "clip_ratio/low_min": 0.00010493374520592624, "clip_ratio/region_mean": 0.002571844575868454, "epoch": 0.14093298100088084, "grad_norm": 0.11101323366165161, "learning_rate": 1e-06, "loss": 0.025, "step": 1510 }, { "clip_ratio/high_max": 0.002534585633839015, "clip_ratio/high_mean": 0.0012337300031504128, "clip_ratio/low_mean": 0.0013232888722995995, "clip_ratio/low_min": 3.3068783523049206e-05, "clip_ratio/region_mean": 0.0025570188809069805, "epoch": 0.14102631410088143, "grad_norm": 0.11713607609272003, "learning_rate": 1e-06, "loss": 0.0082, "step": 1511 }, { "clip_ratio/high_max": 0.002500711430911906, "clip_ratio/high_mean": 0.0011643929647107143, "clip_ratio/low_mean": 0.0014246238661144162, "clip_ratio/low_min": 0.00018839373842638452, "clip_ratio/region_mean": 0.0025890168617479503, "epoch": 0.141119647200882, "grad_norm": 0.42848077416419983, "learning_rate": 1e-06, "loss": 0.0301, "step": 1512 }, { "clip_ratio/high_max": 0.0029574047221103683, "clip_ratio/high_mean": 0.0013003620006202254, "clip_ratio/low_mean": 0.0014682605542475358, "clip_ratio/low_min": 0.00015262598844856257, "clip_ratio/region_mean": 0.0027686225657816976, "epoch": 0.14121298030088258, "grad_norm": 0.11400165408849716, "learning_rate": 1e-06, "loss": 0.0648, "step": 1513 }, { "clip_ratio/high_max": 0.0031129845301620662, "clip_ratio/high_mean": 0.0014050981408217922, "clip_ratio/low_mean": 0.0014090836575633148, "clip_ratio/low_min": 3.0443254217971116e-05, "clip_ratio/region_mean": 0.0028141818183939904, "epoch": 0.14130631340088318, "grad_norm": 0.11875130981206894, "learning_rate": 1e-06, "loss": 0.0443, "step": 1514 }, { "clip_ratio/high_max": 0.0031790507564437576, "clip_ratio/high_mean": 0.001355299238639418, "clip_ratio/low_mean": 0.001307690268731676, "clip_ratio/low_min": 6.134942668722942e-05, "clip_ratio/region_mean": 0.002662989529198967, "epoch": 0.14139964650088374, "grad_norm": 0.11250169575214386, "learning_rate": 1e-06, "loss": -0.0037, "step": 1515 }, { "clip_ratio/high_max": 0.0031033246414153837, "clip_ratio/high_mean": 0.0014096756658545928, "clip_ratio/low_mean": 0.0014154239870549645, "clip_ratio/low_min": 0.00011577975965337828, "clip_ratio/region_mean": 0.0028250996620045044, "epoch": 0.14149297960088433, "grad_norm": 0.11189062893390656, "learning_rate": 1e-06, "loss": -0.0219, "step": 1516 }, { "clip_ratio/high_max": 0.00262328968528891, "clip_ratio/high_mean": 0.0012249394676473457, "clip_ratio/low_mean": 0.0014173396011756267, "clip_ratio/low_min": 2.3818596673663706e-05, "clip_ratio/region_mean": 0.0026422790833748877, "epoch": 0.14158631270088493, "grad_norm": 0.11323901265859604, "learning_rate": 1e-06, "loss": 0.0339, "step": 1517 }, { "clip_ratio/high_max": 0.0030602438637288287, "clip_ratio/high_mean": 0.0012743712250085082, "clip_ratio/low_mean": 0.001624144413653994, "clip_ratio/low_min": 0.00015085985160112614, "clip_ratio/region_mean": 0.0028985156532144174, "epoch": 0.1416796458008855, "grad_norm": 0.13926741480827332, "learning_rate": 1e-06, "loss": 0.0199, "step": 1518 }, { "clip_ratio/high_max": 0.0027332588215358555, "clip_ratio/high_mean": 0.0013018932040722575, "clip_ratio/low_mean": 0.0014374567617778666, "clip_ratio/low_min": 0.00010010834012064151, "clip_ratio/region_mean": 0.0027393499985919334, "epoch": 0.14177297890088608, "grad_norm": 0.11344989389181137, "learning_rate": 1e-06, "loss": 0.0196, "step": 1519 }, { "clip_ratio/high_max": 0.003331129366415553, "clip_ratio/high_mean": 0.001459608472941909, "clip_ratio/low_mean": 0.0014270490228227573, "clip_ratio/low_min": 6.051510354154743e-05, "clip_ratio/region_mean": 0.0028866574430139735, "epoch": 0.14186631200088667, "grad_norm": 1.6462842226028442, "learning_rate": 1e-06, "loss": -0.0064, "step": 1520 }, { "clip_ratio/high_max": 0.003179820880177431, "clip_ratio/high_mean": 0.0013420386057987344, "clip_ratio/low_mean": 0.001668410384809249, "clip_ratio/low_min": 4.8959946070681326e-05, "clip_ratio/region_mean": 0.0030104489414952695, "epoch": 0.14195964510088724, "grad_norm": 0.11637075245380402, "learning_rate": 1e-06, "loss": 0.0047, "step": 1521 }, { "clip_ratio/high_max": 0.003015102287463378, "clip_ratio/high_mean": 0.0012895968175143935, "clip_ratio/low_mean": 0.0015557365622953512, "clip_ratio/low_min": 4.170837564743124e-05, "clip_ratio/region_mean": 0.00284533339436166, "epoch": 0.14205297820088783, "grad_norm": 0.11883708834648132, "learning_rate": 1e-06, "loss": -0.0202, "step": 1522 }, { "clip_ratio/high_max": 0.003301380325865466, "clip_ratio/high_mean": 0.0013964535792183597, "clip_ratio/low_mean": 0.0017771646889741533, "clip_ratio/low_min": 3.9759411265549716e-05, "clip_ratio/region_mean": 0.0031736182572785765, "epoch": 0.14214631130088842, "grad_norm": 0.10774669796228409, "learning_rate": 1e-06, "loss": 0.0109, "step": 1523 }, { "clip_ratio/high_max": 0.00297357883391669, "clip_ratio/high_mean": 0.001272311325010378, "clip_ratio/low_mean": 0.0017494951680419035, "clip_ratio/low_min": 0.00015773228733451106, "clip_ratio/region_mean": 0.0030218064566724934, "epoch": 0.142239644400889, "grad_norm": 0.13024604320526123, "learning_rate": 1e-06, "loss": 0.0398, "step": 1524 }, { "clip_ratio/high_max": 0.0027003982759197243, "clip_ratio/high_mean": 0.0012124937584303552, "clip_ratio/low_mean": 0.0016560470467084087, "clip_ratio/low_min": 0.00012293097734072944, "clip_ratio/region_mean": 0.002868540868803393, "epoch": 0.14233297750088958, "grad_norm": 0.1243739202618599, "learning_rate": 1e-06, "loss": 0.0241, "step": 1525 }, { "clip_ratio/high_max": 0.003458211387624033, "clip_ratio/high_mean": 0.0013308976413100027, "clip_ratio/low_mean": 0.001378816396027105, "clip_ratio/low_min": 6.29810829195776e-05, "clip_ratio/region_mean": 0.0027097140919067897, "epoch": 0.14242631060089017, "grad_norm": 0.11025545746088028, "learning_rate": 1e-06, "loss": -0.0214, "step": 1526 }, { "clip_ratio/high_max": 0.0028820481966249645, "clip_ratio/high_mean": 0.0013134671607986093, "clip_ratio/low_mean": 0.0015541753709840123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028676425718003884, "epoch": 0.14251964370089074, "grad_norm": 0.12545356154441833, "learning_rate": 1e-06, "loss": 0.0145, "step": 1527 }, { "clip_ratio/high_max": 0.003086242730205413, "clip_ratio/high_mean": 0.0013463255054375622, "clip_ratio/low_mean": 0.0014875338129058946, "clip_ratio/low_min": 6.748548912582919e-05, "clip_ratio/region_mean": 0.0028338592528598383, "epoch": 0.14261297680089133, "grad_norm": 0.11443391442298889, "learning_rate": 1e-06, "loss": 0.0096, "step": 1528 }, { "clip_ratio/high_max": 0.0029393138611339964, "clip_ratio/high_mean": 0.0013462018177961, "clip_ratio/low_mean": 0.0016546018741792068, "clip_ratio/low_min": 3.793287578446325e-05, "clip_ratio/region_mean": 0.0030008036774233915, "epoch": 0.14270630990089192, "grad_norm": 0.10528197884559631, "learning_rate": 1e-06, "loss": 0.0022, "step": 1529 }, { "clip_ratio/high_max": 0.0029078684747219086, "clip_ratio/high_mean": 0.0013432889463729225, "clip_ratio/low_mean": 0.001652319660934154, "clip_ratio/low_min": 7.108331192284822e-05, "clip_ratio/region_mean": 0.002995608600031119, "epoch": 0.14279964300089248, "grad_norm": 0.11933331936597824, "learning_rate": 1e-06, "loss": 0.0293, "step": 1530 }, { "clip_ratio/high_max": 0.0028402969110175036, "clip_ratio/high_mean": 0.0011570206261239946, "clip_ratio/low_mean": 0.0016705019297660328, "clip_ratio/low_min": 7.741398167127045e-05, "clip_ratio/region_mean": 0.0028275225777179003, "epoch": 0.14289297610089308, "grad_norm": 0.11140989512205124, "learning_rate": 1e-06, "loss": 0.0589, "step": 1531 }, { "clip_ratio/high_max": 0.002941440950962715, "clip_ratio/high_mean": 0.001299613471928751, "clip_ratio/low_mean": 0.0015287725800590124, "clip_ratio/low_min": 0.00011934255780943204, "clip_ratio/region_mean": 0.002828386081091594, "epoch": 0.14298630920089367, "grad_norm": 0.11770542711019516, "learning_rate": 1e-06, "loss": 0.0069, "step": 1532 }, { "clip_ratio/high_max": 0.0032856551406439394, "clip_ratio/high_mean": 0.001400948309310479, "clip_ratio/low_mean": 0.001484631298808381, "clip_ratio/low_min": 7.879864460846875e-05, "clip_ratio/region_mean": 0.0028855796408606693, "epoch": 0.14307964230089426, "grad_norm": 0.12146227061748505, "learning_rate": 1e-06, "loss": -0.0236, "step": 1533 }, { "clip_ratio/high_max": 0.003145114409562666, "clip_ratio/high_mean": 0.0012771767396770883, "clip_ratio/low_mean": 0.0015248369018081576, "clip_ratio/low_min": 0.00014017606190463994, "clip_ratio/region_mean": 0.0028020136669510975, "epoch": 0.14317297540089483, "grad_norm": 0.11149397492408752, "learning_rate": 1e-06, "loss": 0.0174, "step": 1534 }, { "clip_ratio/high_max": 0.003133705395157449, "clip_ratio/high_mean": 0.0012910643526993226, "clip_ratio/low_mean": 0.0016779629986558575, "clip_ratio/low_min": 0.00010987078348989598, "clip_ratio/region_mean": 0.0029690272931475192, "epoch": 0.14326630850089542, "grad_norm": 0.11300347000360489, "learning_rate": 1e-06, "loss": 0.0376, "step": 1535 }, { "clip_ratio/high_max": 0.0029510759777622297, "clip_ratio/high_mean": 0.0013167555298423395, "clip_ratio/low_mean": 0.0017440295559936203, "clip_ratio/low_min": 0.0001590667961863801, "clip_ratio/region_mean": 0.003060785136767663, "epoch": 0.143359641600896, "grad_norm": 0.10898974537849426, "learning_rate": 1e-06, "loss": 0.0481, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008161272321428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 599.0726928710938, "completions/mean_terminated_length": 570.2984619140625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.14345297470089657, "grad_norm": 0.1331760734319687, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 1051773960.0, "reward": 0.6359427571296692, "reward_std": 0.16864867508411407, "rewards/simpleverify_reward/mean": 0.6359427571296692, "rewards/simpleverify_reward/std": 0.48116692900657654, "step": 1537 }, { "clip_ratio/high_max": 0.002019051789829973, "clip_ratio/high_mean": 0.0008634778732812265, "clip_ratio/low_mean": 0.0005345854478946421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001398063341184752, "epoch": 0.14354630780089717, "grad_norm": 0.1310259848833084, "learning_rate": 1e-06, "loss": 0.0097, "step": 1538 }, { "clip_ratio/high_max": 0.002067156157863792, "clip_ratio/high_mean": 0.0008867731594364159, "clip_ratio/low_mean": 0.0004512238047027495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001337996931397356, "epoch": 0.14363964090089776, "grad_norm": 0.11647213995456696, "learning_rate": 1e-06, "loss": 0.0193, "step": 1539 }, { "clip_ratio/high_max": 0.0020017999086121563, "clip_ratio/high_mean": 0.0007746222036075778, "clip_ratio/low_mean": 0.0005899308016523719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013645530125359073, "epoch": 0.14373297400089832, "grad_norm": 0.12436878681182861, "learning_rate": 1e-06, "loss": 0.0296, "step": 1540 }, { "clip_ratio/high_max": 0.0017152495729533257, "clip_ratio/high_mean": 0.0007533674361184239, "clip_ratio/low_mean": 0.0006592626541532809, "clip_ratio/low_min": 9.738719927554484e-05, "clip_ratio/region_mean": 0.0014126301211945247, "epoch": 0.14382630710089891, "grad_norm": 0.11987806856632233, "learning_rate": 1e-06, "loss": -0.0035, "step": 1541 }, { "clip_ratio/high_max": 0.002251584461191669, "clip_ratio/high_mean": 0.0008071958509390242, "clip_ratio/low_mean": 0.000674265605994151, "clip_ratio/low_min": 2.117567419190891e-05, "clip_ratio/region_mean": 0.0014814614187343977, "epoch": 0.1439196402008995, "grad_norm": 0.11759467422962189, "learning_rate": 1e-06, "loss": 0.0596, "step": 1542 }, { "clip_ratio/high_max": 0.0019849513773806393, "clip_ratio/high_mean": 0.000808235445219907, "clip_ratio/low_mean": 0.0006418463817681186, "clip_ratio/low_min": 4.139504562772345e-05, "clip_ratio/region_mean": 0.0014500818433589302, "epoch": 0.14401297330090007, "grad_norm": 0.11865795403718948, "learning_rate": 1e-06, "loss": 0.0076, "step": 1543 }, { "clip_ratio/high_max": 0.001998230756726116, "clip_ratio/high_mean": 0.0008913590736483457, "clip_ratio/low_mean": 0.0007811305358700338, "clip_ratio/low_min": 4.272562546248082e-05, "clip_ratio/region_mean": 0.0016724896267987788, "epoch": 0.14410630640090066, "grad_norm": 0.11927706003189087, "learning_rate": 1e-06, "loss": 0.0324, "step": 1544 }, { "clip_ratio/high_max": 0.001980333574465476, "clip_ratio/high_mean": 0.000781944812842994, "clip_ratio/low_mean": 0.0008639046627649805, "clip_ratio/low_min": 3.221677434339654e-05, "clip_ratio/region_mean": 0.0016458494719699956, "epoch": 0.14419963950090126, "grad_norm": 0.13412626087665558, "learning_rate": 1e-06, "loss": 0.0556, "step": 1545 }, { "clip_ratio/high_max": 0.0020835180912399665, "clip_ratio/high_mean": 0.0009011451438709628, "clip_ratio/low_mean": 0.0007664835884497734, "clip_ratio/low_min": 0.00010425875916553196, "clip_ratio/region_mean": 0.0016676287486916408, "epoch": 0.14429297260090182, "grad_norm": 0.12752968072891235, "learning_rate": 1e-06, "loss": 0.0426, "step": 1546 }, { "clip_ratio/high_max": 0.001979969398234971, "clip_ratio/high_mean": 0.0009074920399143593, "clip_ratio/low_mean": 0.000613282230006007, "clip_ratio/low_min": 1.2810001862817444e-05, "clip_ratio/region_mean": 0.0015207742399070412, "epoch": 0.1443863057009024, "grad_norm": 0.10908825695514679, "learning_rate": 1e-06, "loss": -0.019, "step": 1547 }, { "clip_ratio/high_max": 0.0022655078428215347, "clip_ratio/high_mean": 0.0008852544497131021, "clip_ratio/low_mean": 0.0008539426307834219, "clip_ratio/low_min": 2.2973717932472937e-05, "clip_ratio/region_mean": 0.0017391971305187326, "epoch": 0.144479638800903, "grad_norm": 0.12035182118415833, "learning_rate": 1e-06, "loss": 0.0247, "step": 1548 }, { "clip_ratio/high_max": 0.002204020962381037, "clip_ratio/high_mean": 0.0008539673654013313, "clip_ratio/low_mean": 0.0007013692666077986, "clip_ratio/low_min": 1.7346656022709794e-05, "clip_ratio/region_mean": 0.0015553366356471088, "epoch": 0.14457297190090357, "grad_norm": 0.11636511981487274, "learning_rate": 1e-06, "loss": 0.003, "step": 1549 }, { "clip_ratio/high_max": 0.0024815234864945523, "clip_ratio/high_mean": 0.0009739296692714561, "clip_ratio/low_mean": 0.0008585067253079615, "clip_ratio/low_min": 3.0005554435774684e-05, "clip_ratio/region_mean": 0.0018324364355066791, "epoch": 0.14466630500090416, "grad_norm": 1.255921721458435, "learning_rate": 1e-06, "loss": 0.0294, "step": 1550 }, { "clip_ratio/high_max": 0.0020831542933592573, "clip_ratio/high_mean": 0.0008278592958959052, "clip_ratio/low_mean": 0.0009873319413600257, "clip_ratio/low_min": 7.304474638658576e-05, "clip_ratio/region_mean": 0.001815191215428058, "epoch": 0.14475963810090475, "grad_norm": 0.1153787150979042, "learning_rate": 1e-06, "loss": 0.0611, "step": 1551 }, { "clip_ratio/high_max": 0.0023547829696326517, "clip_ratio/high_mean": 0.0009259260932594771, "clip_ratio/low_mean": 0.0009883973871183116, "clip_ratio/low_min": 3.264272345404606e-05, "clip_ratio/region_mean": 0.0019143234312650748, "epoch": 0.14485297120090532, "grad_norm": 0.11677554994821548, "learning_rate": 1e-06, "loss": 0.0272, "step": 1552 }, { "clip_ratio/high_max": 0.0025879938693833537, "clip_ratio/high_mean": 0.0010390332026872784, "clip_ratio/low_mean": 0.0009386335623275954, "clip_ratio/low_min": 5.804289139632601e-05, "clip_ratio/region_mean": 0.0019776667177211493, "epoch": 0.1449463043009059, "grad_norm": 0.11043479293584824, "learning_rate": 1e-06, "loss": -0.0186, "step": 1553 }, { "clip_ratio/high_max": 0.0024974236876005307, "clip_ratio/high_mean": 0.001022075357468566, "clip_ratio/low_mean": 0.0009316568211943377, "clip_ratio/low_min": 1.9118997443001717e-05, "clip_ratio/region_mean": 0.001953732149559073, "epoch": 0.1450396374009065, "grad_norm": 0.11594738811254501, "learning_rate": 1e-06, "loss": 0.0062, "step": 1554 }, { "clip_ratio/high_max": 0.00220486092439387, "clip_ratio/high_mean": 0.0009031249464896973, "clip_ratio/low_mean": 0.000964019394814386, "clip_ratio/low_min": 5.779587445431389e-05, "clip_ratio/region_mean": 0.0018671443249331787, "epoch": 0.1451329705009071, "grad_norm": 0.10877743363380432, "learning_rate": 1e-06, "loss": 0.0089, "step": 1555 }, { "clip_ratio/high_max": 0.0022923965079826303, "clip_ratio/high_mean": 0.0009772972498467425, "clip_ratio/low_mean": 0.0011835394798254129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021608367533190176, "epoch": 0.14522630360090766, "grad_norm": 0.12109901756048203, "learning_rate": 1e-06, "loss": 0.0377, "step": 1556 }, { "clip_ratio/high_max": 0.0022736464816262014, "clip_ratio/high_mean": 0.0009526156591164181, "clip_ratio/low_mean": 0.0010804358644236345, "clip_ratio/low_min": 0.00014354121685755672, "clip_ratio/region_mean": 0.002033051503531169, "epoch": 0.14531963670090825, "grad_norm": 0.1205814853310585, "learning_rate": 1e-06, "loss": 0.0353, "step": 1557 }, { "clip_ratio/high_max": 0.00256652897951426, "clip_ratio/high_mean": 0.0011049522545363288, "clip_ratio/low_mean": 0.000883828453879687, "clip_ratio/low_min": 6.348605529638007e-05, "clip_ratio/region_mean": 0.0019887807648046874, "epoch": 0.14541296980090884, "grad_norm": 0.11574000865221024, "learning_rate": 1e-06, "loss": -0.0104, "step": 1558 }, { "clip_ratio/high_max": 0.0021898317645536736, "clip_ratio/high_mean": 0.0008501024622091791, "clip_ratio/low_mean": 0.001046256918925792, "clip_ratio/low_min": 3.1107282666198444e-05, "clip_ratio/region_mean": 0.0018963593611260876, "epoch": 0.1455063029009094, "grad_norm": 0.11880280822515488, "learning_rate": 1e-06, "loss": 0.0554, "step": 1559 }, { "clip_ratio/high_max": 0.0027522937962203287, "clip_ratio/high_mean": 0.0010890533012570813, "clip_ratio/low_mean": 0.0009512917822576128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002040345105342567, "epoch": 0.14559963600091, "grad_norm": 0.10995512455701828, "learning_rate": 1e-06, "loss": -0.0025, "step": 1560 }, { "clip_ratio/high_max": 0.00233405118342489, "clip_ratio/high_mean": 0.0010532149444770766, "clip_ratio/low_mean": 0.001044001128320815, "clip_ratio/low_min": 8.496898772136774e-05, "clip_ratio/region_mean": 0.002097216041875072, "epoch": 0.1456929691009106, "grad_norm": 0.11573732644319534, "learning_rate": 1e-06, "loss": 0.0233, "step": 1561 }, { "clip_ratio/high_max": 0.002588451803603675, "clip_ratio/high_mean": 0.0010720247009885497, "clip_ratio/low_mean": 0.0007863260125304805, "clip_ratio/low_min": 3.9687422940914985e-05, "clip_ratio/region_mean": 0.0018583506607683375, "epoch": 0.14578630220091116, "grad_norm": 0.1103486642241478, "learning_rate": 1e-06, "loss": -0.0265, "step": 1562 }, { "clip_ratio/high_max": 0.0026435923427925445, "clip_ratio/high_mean": 0.0010283235696988413, "clip_ratio/low_mean": 0.0010669130824680906, "clip_ratio/low_min": 5.3719360948889516e-05, "clip_ratio/region_mean": 0.0020952366321580485, "epoch": 0.14587963530091175, "grad_norm": 0.10596740990877151, "learning_rate": 1e-06, "loss": 0.005, "step": 1563 }, { "clip_ratio/high_max": 0.0024243352381745353, "clip_ratio/high_mean": 0.0010283890787832206, "clip_ratio/low_mean": 0.0011610231995291542, "clip_ratio/low_min": 3.0162674192979466e-05, "clip_ratio/region_mean": 0.0021894123128731735, "epoch": 0.14597296840091234, "grad_norm": 0.1223297044634819, "learning_rate": 1e-06, "loss": 0.0313, "step": 1564 }, { "clip_ratio/high_max": 0.00273451762041077, "clip_ratio/high_mean": 0.0011236479658691678, "clip_ratio/low_mean": 0.001045834957039915, "clip_ratio/low_min": 3.8516938730026595e-05, "clip_ratio/region_mean": 0.0021694829265470617, "epoch": 0.1460663015009129, "grad_norm": 0.12946964800357819, "learning_rate": 1e-06, "loss": 0.0215, "step": 1565 }, { "clip_ratio/high_max": 0.0027597406842687633, "clip_ratio/high_mean": 0.0010764291237137513, "clip_ratio/low_mean": 0.001110309938667342, "clip_ratio/low_min": 0.00017597440455574542, "clip_ratio/region_mean": 0.002186739075114019, "epoch": 0.1461596346009135, "grad_norm": 0.11915992945432663, "learning_rate": 1e-06, "loss": 0.029, "step": 1566 }, { "clip_ratio/high_max": 0.0024903367775550578, "clip_ratio/high_mean": 0.0010254174158035312, "clip_ratio/low_mean": 0.0011325118230161024, "clip_ratio/low_min": 0.00012716423225356266, "clip_ratio/region_mean": 0.0021579292151727714, "epoch": 0.1462529677009141, "grad_norm": 0.1284736692905426, "learning_rate": 1e-06, "loss": 0.0367, "step": 1567 }, { "clip_ratio/high_max": 0.00270240713871317, "clip_ratio/high_mean": 0.0011996364228252787, "clip_ratio/low_mean": 0.0011255001809331588, "clip_ratio/low_min": 1.429551684850594e-05, "clip_ratio/region_mean": 0.0023251365855685435, "epoch": 0.14634630080091465, "grad_norm": 1.073857307434082, "learning_rate": 1e-06, "loss": 0.014, "step": 1568 }, { "clip_ratio/high_max": 0.0028473604943428654, "clip_ratio/high_mean": 0.0010889980458159698, "clip_ratio/low_mean": 0.00119545605048188, "clip_ratio/low_min": 2.9063590773148462e-05, "clip_ratio/region_mean": 0.0022844540944788605, "epoch": 0.14643963390091524, "grad_norm": 0.13501380383968353, "learning_rate": 1e-06, "loss": 0.0297, "step": 1569 }, { "clip_ratio/high_max": 0.0028598517674254254, "clip_ratio/high_mean": 0.0010944512741843937, "clip_ratio/low_mean": 0.0011131579412904102, "clip_ratio/low_min": 6.923250839463435e-05, "clip_ratio/region_mean": 0.0022076092354836874, "epoch": 0.14653296700091584, "grad_norm": 0.12409362196922302, "learning_rate": 1e-06, "loss": 0.0017, "step": 1570 }, { "clip_ratio/high_max": 0.0026152804057346657, "clip_ratio/high_mean": 0.0009437288281333167, "clip_ratio/low_mean": 0.001275744660233613, "clip_ratio/low_min": 2.681252772163134e-05, "clip_ratio/region_mean": 0.0022194734192453325, "epoch": 0.1466263001009164, "grad_norm": 0.12028136849403381, "learning_rate": 1e-06, "loss": 0.0456, "step": 1571 }, { "clip_ratio/high_max": 0.002573876212409232, "clip_ratio/high_mean": 0.0010177784697589232, "clip_ratio/low_mean": 0.0013646343140862882, "clip_ratio/low_min": 8.376202458748594e-05, "clip_ratio/region_mean": 0.002382412800216116, "epoch": 0.146719633200917, "grad_norm": 0.10730920732021332, "learning_rate": 1e-06, "loss": 0.0449, "step": 1572 }, { "clip_ratio/high_max": 0.00261613750262768, "clip_ratio/high_mean": 0.0009601320034562377, "clip_ratio/low_mean": 0.0012613291401066817, "clip_ratio/low_min": 0.0001273709367524134, "clip_ratio/region_mean": 0.0022214612326934002, "epoch": 0.14681296630091759, "grad_norm": 0.17470651865005493, "learning_rate": 1e-06, "loss": 0.0471, "step": 1573 }, { "clip_ratio/high_max": 0.002623839107400272, "clip_ratio/high_mean": 0.001100221215892816, "clip_ratio/low_mean": 0.0012966400936420541, "clip_ratio/low_min": 3.383255170774646e-05, "clip_ratio/region_mean": 0.002396861273155082, "epoch": 0.14690629940091818, "grad_norm": 0.19125966727733612, "learning_rate": 1e-06, "loss": 0.0391, "step": 1574 }, { "clip_ratio/high_max": 0.002946718923340086, "clip_ratio/high_mean": 0.001231655018273159, "clip_ratio/low_mean": 0.0010425556283735204, "clip_ratio/low_min": 5.0680047934292816e-05, "clip_ratio/region_mean": 0.0022742106884834357, "epoch": 0.14699963250091874, "grad_norm": 0.10879350453615189, "learning_rate": 1e-06, "loss": -0.0079, "step": 1575 }, { "clip_ratio/high_max": 0.002816996639012359, "clip_ratio/high_mean": 0.001196005232486641, "clip_ratio/low_mean": 0.001053724448865978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002249729681352619, "epoch": 0.14709296560091933, "grad_norm": 0.11205597221851349, "learning_rate": 1e-06, "loss": 0.0073, "step": 1576 }, { "clip_ratio/high_max": 0.002803973609843524, "clip_ratio/high_mean": 0.0010040341312560486, "clip_ratio/low_mean": 0.001451206517231185, "clip_ratio/low_min": 0.00021219081463641487, "clip_ratio/region_mean": 0.002455240646668244, "epoch": 0.14718629870091993, "grad_norm": 2.6633191108703613, "learning_rate": 1e-06, "loss": 0.0441, "step": 1577 }, { "clip_ratio/high_max": 0.002892303360567894, "clip_ratio/high_mean": 0.001038779544614954, "clip_ratio/low_mean": 0.0014200555888237432, "clip_ratio/low_min": 9.173279977403581e-05, "clip_ratio/region_mean": 0.002458835078869015, "epoch": 0.1472796318009205, "grad_norm": 0.12540794909000397, "learning_rate": 1e-06, "loss": 0.0469, "step": 1578 }, { "clip_ratio/high_max": 0.002852613200957421, "clip_ratio/high_mean": 0.0012918764005007688, "clip_ratio/low_mean": 0.0014276863330451306, "clip_ratio/low_min": 6.04595315962797e-05, "clip_ratio/region_mean": 0.0027195628063054755, "epoch": 0.14737296490092108, "grad_norm": 0.12613168358802795, "learning_rate": 1e-06, "loss": 0.011, "step": 1579 }, { "clip_ratio/high_max": 0.0028884717175969854, "clip_ratio/high_mean": 0.001152550190454349, "clip_ratio/low_mean": 0.0013026039814576507, "clip_ratio/low_min": 8.54961144796107e-05, "clip_ratio/region_mean": 0.0024551541719119996, "epoch": 0.14746629800092168, "grad_norm": 0.12145470827817917, "learning_rate": 1e-06, "loss": 0.0317, "step": 1580 }, { "clip_ratio/high_max": 0.0025187965293298475, "clip_ratio/high_mean": 0.001133369209128432, "clip_ratio/low_mean": 0.0013474826300807763, "clip_ratio/low_min": 2.9797378374496475e-05, "clip_ratio/region_mean": 0.0024808517991914414, "epoch": 0.14755963110092224, "grad_norm": 0.11006373167037964, "learning_rate": 1e-06, "loss": 0.0404, "step": 1581 }, { "clip_ratio/high_max": 0.0030235302328947, "clip_ratio/high_mean": 0.001170645406091353, "clip_ratio/low_mean": 0.0011911879755643895, "clip_ratio/low_min": 2.788933488773182e-05, "clip_ratio/region_mean": 0.0023618333871127106, "epoch": 0.14765296420092283, "grad_norm": 0.11489474028348923, "learning_rate": 1e-06, "loss": 0.0102, "step": 1582 }, { "clip_ratio/high_max": 0.002835865416273009, "clip_ratio/high_mean": 0.0011893027640326181, "clip_ratio/low_mean": 0.0014563824988726992, "clip_ratio/low_min": 8.87622209120309e-05, "clip_ratio/region_mean": 0.0026456852647243068, "epoch": 0.14774629730092342, "grad_norm": 0.12050473690032959, "learning_rate": 1e-06, "loss": 0.0667, "step": 1583 }, { "clip_ratio/high_max": 0.0025018454616656527, "clip_ratio/high_mean": 0.0011468420671008062, "clip_ratio/low_mean": 0.0014014012704137713, "clip_ratio/low_min": 4.745633850689046e-05, "clip_ratio/region_mean": 0.0025482433702563867, "epoch": 0.147839630400924, "grad_norm": 0.20857176184654236, "learning_rate": 1e-06, "loss": 0.033, "step": 1584 }, { "clip_ratio/high_max": 0.0030475231833406724, "clip_ratio/high_mean": 0.00126223027746164, "clip_ratio/low_mean": 0.0015846530368435197, "clip_ratio/low_min": 4.858142347075045e-05, "clip_ratio/region_mean": 0.0028468833334045485, "epoch": 0.14793296350092458, "grad_norm": 0.11923615634441376, "learning_rate": 1e-06, "loss": 0.0344, "step": 1585 }, { "clip_ratio/high_max": 0.002954526797111612, "clip_ratio/high_mean": 0.0013487739634001628, "clip_ratio/low_mean": 0.0013275169076223392, "clip_ratio/low_min": 1.7720441974233836e-05, "clip_ratio/region_mean": 0.0026762908601085655, "epoch": 0.14802629660092517, "grad_norm": 0.12191282957792282, "learning_rate": 1e-06, "loss": -0.0257, "step": 1586 }, { "clip_ratio/high_max": 0.0030190691468305886, "clip_ratio/high_mean": 0.0012583687785081565, "clip_ratio/low_mean": 0.0014010791674081702, "clip_ratio/low_min": 0.00010601799476717133, "clip_ratio/region_mean": 0.002659447920450475, "epoch": 0.14811962970092574, "grad_norm": 0.1292433738708496, "learning_rate": 1e-06, "loss": 0.0325, "step": 1587 }, { "clip_ratio/high_max": 0.0030810479365754873, "clip_ratio/high_mean": 0.0013153013860573992, "clip_ratio/low_mean": 0.0013788374053547159, "clip_ratio/low_min": 7.098281093931291e-05, "clip_ratio/region_mean": 0.0026941387695842423, "epoch": 0.14821296280092633, "grad_norm": 0.260358601808548, "learning_rate": 1e-06, "loss": -0.0008, "step": 1588 }, { "clip_ratio/high_max": 0.0031403375978698023, "clip_ratio/high_mean": 0.0012714104523183778, "clip_ratio/low_mean": 0.0013542760352720506, "clip_ratio/low_min": 0.00017211161502928007, "clip_ratio/region_mean": 0.0026256864075548947, "epoch": 0.14830629590092692, "grad_norm": 0.14988058805465698, "learning_rate": 1e-06, "loss": 0.0254, "step": 1589 }, { "clip_ratio/high_max": 0.0026779743857332505, "clip_ratio/high_mean": 0.0010788335021061357, "clip_ratio/low_mean": 0.0014734040614712285, "clip_ratio/low_min": 0.00010233341527055018, "clip_ratio/region_mean": 0.0025522375872242264, "epoch": 0.14839962900092749, "grad_norm": 0.19826769828796387, "learning_rate": 1e-06, "loss": 0.0431, "step": 1590 }, { "clip_ratio/high_max": 0.002470103612722596, "clip_ratio/high_mean": 0.0010616986564855324, "clip_ratio/low_mean": 0.0014063022936170455, "clip_ratio/low_min": 0.00016725757814128883, "clip_ratio/region_mean": 0.0024680008937139064, "epoch": 0.14849296210092808, "grad_norm": 0.10588473081588745, "learning_rate": 1e-06, "loss": 0.0298, "step": 1591 }, { "clip_ratio/high_max": 0.0029494402988348156, "clip_ratio/high_mean": 0.0011926215374842286, "clip_ratio/low_mean": 0.0016863905257196166, "clip_ratio/low_min": 0.00017769355599739356, "clip_ratio/region_mean": 0.0028790119977202266, "epoch": 0.14858629520092867, "grad_norm": 0.11281023174524307, "learning_rate": 1e-06, "loss": 0.044, "step": 1592 }, { "clip_ratio/high_max": 0.003037026028323453, "clip_ratio/high_mean": 0.001229690187756205, "clip_ratio/low_mean": 0.0015455428110726643, "clip_ratio/low_min": 6.604961163247935e-05, "clip_ratio/region_mean": 0.0027752329770009965, "epoch": 0.14867962830092923, "grad_norm": 0.11929027736186981, "learning_rate": 1e-06, "loss": 0.0411, "step": 1593 }, { "clip_ratio/high_max": 0.0026094395070686005, "clip_ratio/high_mean": 0.001062958699549199, "clip_ratio/low_mean": 0.001433082183211809, "clip_ratio/low_min": 0.00010813827611855231, "clip_ratio/region_mean": 0.0024960408845799975, "epoch": 0.14877296140092983, "grad_norm": 0.1162814125418663, "learning_rate": 1e-06, "loss": 0.0292, "step": 1594 }, { "clip_ratio/high_max": 0.0026943126213154756, "clip_ratio/high_mean": 0.0011308098873996641, "clip_ratio/low_mean": 0.0013149891819921322, "clip_ratio/low_min": 0.00010756266055977903, "clip_ratio/region_mean": 0.0024457990512019023, "epoch": 0.14886629450093042, "grad_norm": 0.11057198792695999, "learning_rate": 1e-06, "loss": -0.0308, "step": 1595 }, { "clip_ratio/high_max": 0.003469643968855962, "clip_ratio/high_mean": 0.00149761811553617, "clip_ratio/low_mean": 0.0012887150323876995, "clip_ratio/low_min": 0.00010086231759487418, "clip_ratio/region_mean": 0.0027863332361448556, "epoch": 0.148959627600931, "grad_norm": 0.116305872797966, "learning_rate": 1e-06, "loss": -0.0246, "step": 1596 }, { "clip_ratio/high_max": 0.003023018791282084, "clip_ratio/high_mean": 0.0011794157508120406, "clip_ratio/low_mean": 0.0016139480576384813, "clip_ratio/low_min": 6.956141623959411e-05, "clip_ratio/region_mean": 0.0027933637175010517, "epoch": 0.14905296070093157, "grad_norm": 0.12252958863973618, "learning_rate": 1e-06, "loss": 0.0765, "step": 1597 }, { "clip_ratio/high_max": 0.0026197668703389354, "clip_ratio/high_mean": 0.001151306332758395, "clip_ratio/low_mean": 0.0015905627988104243, "clip_ratio/low_min": 0.00012925142164021963, "clip_ratio/region_mean": 0.0027418691679486074, "epoch": 0.14914629380093217, "grad_norm": 0.14624157547950745, "learning_rate": 1e-06, "loss": 0.0328, "step": 1598 }, { "clip_ratio/high_max": 0.0031123743756324984, "clip_ratio/high_mean": 0.0012926149393024389, "clip_ratio/low_mean": 0.0014306021694210358, "clip_ratio/low_min": 0.0001934835672727786, "clip_ratio/region_mean": 0.002723217046877835, "epoch": 0.14923962690093276, "grad_norm": 0.1320226639509201, "learning_rate": 1e-06, "loss": 0.016, "step": 1599 }, { "clip_ratio/high_max": 0.0031263567216228694, "clip_ratio/high_mean": 0.0013776494306512177, "clip_ratio/low_mean": 0.0014489430614048615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028265925211599097, "epoch": 0.14933296000093332, "grad_norm": 0.12342894822359085, "learning_rate": 1e-06, "loss": 0.0017, "step": 1600 }, { "epoch": 0.14933296000093332, "step": 1600, "total_flos": 0.0, "train_loss": 0.004436979327128938, "train_runtime": 23038.4999, "train_samples_per_second": 62.226, "train_steps_per_second": 0.069 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 1051773960, "num_train_epochs": 1, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }