{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.73724117818606, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3643.0, "completions/mean_length": 587.203125, "completions/mean_terminated_length": 539.5724487304688, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.0023330417031204435, "grad_norm": 0.16698822379112244, "learning_rate": 1e-06, "loss": 0.005, "num_tokens": 563486.0, "reward": 0.527901828289032, "reward_std": 0.25697851181030273, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1 }, { "clip_ratio/high_max": 0.0022069819242460653, "clip_ratio/high_mean": 0.0010783750840346329, "clip_ratio/low_mean": 0.0006455714101321064, "clip_ratio/low_min": 5.8881477343675215e-05, "clip_ratio/region_mean": 0.001723946501442697, "epoch": 0.004666083406240887, "grad_norm": 0.1633935123682022, "learning_rate": 1e-06, "loss": 0.005, "step": 2 }, { "clip_ratio/high_max": 0.0028528971306513995, "clip_ratio/high_mean": 0.001173999782622559, "clip_ratio/low_mean": 0.0008453696245851461, "clip_ratio/low_min": 0.00010949714578600833, "clip_ratio/region_mean": 0.002019369370827917, "epoch": 0.00699912510936133, "grad_norm": 0.13955481350421906, "learning_rate": 1e-06, "loss": 0.0049, "step": 3 }, { "clip_ratio/high_max": 0.002438152863760479, "clip_ratio/high_mean": 0.0011322107275191229, "clip_ratio/low_mean": 0.0006964771737330011, "clip_ratio/low_min": 3.904342884197831e-05, "clip_ratio/region_mean": 0.0018286878912476823, "epoch": 0.009332166812481774, "grad_norm": 0.17257088422775269, "learning_rate": 1e-06, "loss": 0.005, "step": 4 }, { "clip_ratio/high_max": 0.0022956588290981017, "clip_ratio/high_mean": 0.0010180644603678957, "clip_ratio/low_mean": 0.0007858866883907467, "clip_ratio/low_min": 2.8750228011631407e-05, "clip_ratio/region_mean": 0.0018039511851384304, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3700.0, "completions/mean_length": 594.7767944335938, "completions/mean_terminated_length": 535.1646118164062, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.011665208515602217, "grad_norm": 0.11943060904741287, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 1122222.0, "reward": 0.5, "reward_std": 0.20932702720165253, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 5 }, { "clip_ratio/high_max": 0.0021394284267444164, "clip_ratio/high_mean": 0.000899278496945044, "clip_ratio/low_mean": 0.0005628358121612109, "clip_ratio/low_min": 2.8638078219955787e-05, "clip_ratio/region_mean": 0.0014621142763644457, "epoch": 0.01399825021872266, "grad_norm": 0.11965198069810867, "learning_rate": 1e-06, "loss": 0.0046, "step": 6 }, { "clip_ratio/high_max": 0.0021505132172023878, "clip_ratio/high_mean": 0.0008898037758626742, "clip_ratio/low_mean": 0.0005199446704864386, "clip_ratio/low_min": 1.4253135304898024e-05, "clip_ratio/region_mean": 0.0014097484418016393, "epoch": 0.016331291921843103, "grad_norm": 0.12519127130508423, "learning_rate": 1e-06, "loss": 0.0046, "step": 7 }, { "clip_ratio/high_max": 0.002451470816595247, "clip_ratio/high_mean": 0.0009645240534155164, "clip_ratio/low_mean": 0.0005826151964356541, "clip_ratio/low_min": 1.2815255104214884e-05, "clip_ratio/region_mean": 0.0015471392616746016, "epoch": 0.018664333624963548, "grad_norm": 0.12314146012067795, "learning_rate": 1e-06, "loss": 0.0045, "step": 8 }, { "clip_ratio/high_max": 0.0023484104895032942, "clip_ratio/high_mean": 0.001060373444488505, "clip_ratio/low_mean": 0.0005991220896248706, "clip_ratio/low_min": 4.0291668483405374e-05, "clip_ratio/region_mean": 0.0016594955814071, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 578.3449096679688, "completions/mean_terminated_length": 538.6422119140625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.02099737532808399, "grad_norm": 0.12650485336780548, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 1698003.0, "reward": 0.551339328289032, "reward_std": 0.22744254767894745, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 9 }, { "clip_ratio/high_max": 0.0027920536304009147, "clip_ratio/high_mean": 0.0012331056750554126, "clip_ratio/low_mean": 0.0008574621224397561, "clip_ratio/low_min": 5.941229301242856e-05, "clip_ratio/region_mean": 0.0020905678466078825, "epoch": 0.023330417031204434, "grad_norm": 0.1286631077528, "learning_rate": 1e-06, "loss": -0.0084, "step": 10 }, { "clip_ratio/high_max": 0.0026788903087435756, "clip_ratio/high_mean": 0.001218049834278645, "clip_ratio/low_mean": 0.0008356225189345423, "clip_ratio/low_min": 3.064634256588761e-05, "clip_ratio/region_mean": 0.002053672360489145, "epoch": 0.025663458734324875, "grad_norm": 0.12640273571014404, "learning_rate": 1e-06, "loss": -0.0084, "step": 11 }, { "clip_ratio/high_max": 0.002322336964425631, "clip_ratio/high_mean": 0.0010376910759077873, "clip_ratio/low_mean": 0.0007058258488541469, "clip_ratio/low_min": 3.341352567076683e-05, "clip_ratio/region_mean": 0.001743516928399913, "epoch": 0.02799650043744532, "grad_norm": 0.12750467658042908, "learning_rate": 1e-06, "loss": -0.0083, "step": 12 }, { "clip_ratio/high_max": 0.0020559522235998884, "clip_ratio/high_mean": 0.0008066238151513971, "clip_ratio/low_mean": 0.0005081693761894712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013147932186257094, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3344.0, "completions/mean_length": 552.9185791015625, "completions/mean_terminated_length": 516.9683837890625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.03032954214056576, "grad_norm": 0.12522836029529572, "learning_rate": 1e-06, "loss": 0.0153, "num_tokens": 2235986.0, "reward": 0.6037946939468384, "reward_std": 0.16871395707130432, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 13 }, { "clip_ratio/high_max": 0.0018420492524455767, "clip_ratio/high_mean": 0.0007820696209819289, "clip_ratio/low_mean": 0.0005368171496229479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013188867560529616, "epoch": 0.032662583843686206, "grad_norm": 0.12687888741493225, "learning_rate": 1e-06, "loss": 0.0153, "step": 14 }, { "clip_ratio/high_max": 0.0020066569559276104, "clip_ratio/high_mean": 0.0007651312698726542, "clip_ratio/low_mean": 0.0005698878376279026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013350191111385357, "epoch": 0.03499562554680665, "grad_norm": 0.12371513992547989, "learning_rate": 1e-06, "loss": 0.0152, "step": 15 }, { "clip_ratio/high_max": 0.00219004787504673, "clip_ratio/high_mean": 0.0008731490852369461, "clip_ratio/low_mean": 0.0005429762913990999, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001416125403920887, "epoch": 0.037328667249927096, "grad_norm": 0.12247077375650406, "learning_rate": 1e-06, "loss": 0.0152, "step": 16 }, { "clip_ratio/high_max": 0.002825789801136125, "clip_ratio/high_mean": 0.0009640934840717819, "clip_ratio/low_mean": 0.0007049099258438218, "clip_ratio/low_min": 2.2309477571980096e-05, "clip_ratio/region_mean": 0.0016690033953636885, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 585.8761596679688, "completions/mean_terminated_length": 550.2603759765625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.039661708953047534, "grad_norm": 0.1257213056087494, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 2807635.0, "reward": 0.5558035969734192, "reward_std": 0.2114010900259018, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 17 }, { "clip_ratio/high_max": 0.0022896416921867058, "clip_ratio/high_mean": 0.0008756098104640841, "clip_ratio/low_mean": 0.0006907273291290039, "clip_ratio/low_min": 3.0339918339450378e-05, "clip_ratio/region_mean": 0.001566337152326014, "epoch": 0.04199475065616798, "grad_norm": 0.12755708396434784, "learning_rate": 1e-06, "loss": 0.0098, "step": 18 }, { "clip_ratio/high_max": 0.0026625360042089596, "clip_ratio/high_mean": 0.0009646706348576117, "clip_ratio/low_mean": 0.0008121828068397008, "clip_ratio/low_min": 6.977430894039571e-05, "clip_ratio/region_mean": 0.0017768534162314609, "epoch": 0.04432779235928842, "grad_norm": 0.12369353324174881, "learning_rate": 1e-06, "loss": 0.0097, "step": 19 }, { "clip_ratio/high_max": 0.002680979043361731, "clip_ratio/high_mean": 0.0009425378666492179, "clip_ratio/low_mean": 0.0008169242337316973, "clip_ratio/low_min": 3.169920455547981e-05, "clip_ratio/region_mean": 0.0017594620658201165, "epoch": 0.04666083406240887, "grad_norm": 0.12699131667613983, "learning_rate": 1e-06, "loss": 0.0097, "step": 20 }, { "clip_ratio/high_max": 0.002220753289293498, "clip_ratio/high_mean": 0.0009175045743177179, "clip_ratio/low_mean": 0.0008161449168255785, "clip_ratio/low_min": 0.00013175159074307885, "clip_ratio/region_mean": 0.0017336494856863283, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 619.75, "completions/mean_terminated_length": 556.54541015625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.048993875765529306, "grad_norm": 0.13259917497634888, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 3387819.0, "reward": 0.5424107313156128, "reward_std": 0.23859378695487976, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 21 }, { "clip_ratio/high_max": 0.0023855977415223606, "clip_ratio/high_mean": 0.0009153922910627443, "clip_ratio/low_mean": 0.0007925679528852925, "clip_ratio/low_min": 4.4670691750070546e-05, "clip_ratio/region_mean": 0.0017079602475860156, "epoch": 0.05132691746864975, "grad_norm": 0.1317659616470337, "learning_rate": 1e-06, "loss": 0.0079, "step": 22 }, { "clip_ratio/high_max": 0.00248773412022274, "clip_ratio/high_mean": 0.001008262152026873, "clip_ratio/low_mean": 0.0008567912464059191, "clip_ratio/low_min": 5.655383756675292e-05, "clip_ratio/region_mean": 0.0018650533675099723, "epoch": 0.053659959171770195, "grad_norm": 0.12728235125541687, "learning_rate": 1e-06, "loss": 0.0077, "step": 23 }, { "clip_ratio/high_max": 0.0023714817289146595, "clip_ratio/high_mean": 0.0009403181011293782, "clip_ratio/low_mean": 0.0008030677654460305, "clip_ratio/low_min": 3.803046638495289e-05, "clip_ratio/region_mean": 0.0017433858338335995, "epoch": 0.05599300087489064, "grad_norm": 0.1325240582227707, "learning_rate": 1e-06, "loss": 0.0078, "step": 24 }, { "clip_ratio/high_max": 0.0022767197297071107, "clip_ratio/high_mean": 0.0008823110983939841, "clip_ratio/low_mean": 0.0007937570480862632, "clip_ratio/low_min": 1.3569257134804502e-05, "clip_ratio/region_mean": 0.0016760681282903533, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 560.372802734375, "completions/mean_terminated_length": 508.3193664550781, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.058326042578011085, "grad_norm": 0.12760208547115326, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 3935121.0, "reward": 0.5959821939468384, "reward_std": 0.20531155169010162, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 25 }, { "clip_ratio/high_max": 0.0021842393289261963, "clip_ratio/high_mean": 0.0008866135376592865, "clip_ratio/low_mean": 0.0007609765889355913, "clip_ratio/low_min": 3.3256137612625025e-05, "clip_ratio/region_mean": 0.0016475900920340791, "epoch": 0.06065908428113152, "grad_norm": 0.12752452492713928, "learning_rate": 1e-06, "loss": -0.0053, "step": 26 }, { "clip_ratio/high_max": 0.002527207587263547, "clip_ratio/high_mean": 0.0009436444752282114, "clip_ratio/low_mean": 0.0007622613429703051, "clip_ratio/low_min": 1.7156189642264508e-05, "clip_ratio/region_mean": 0.0017059057718142867, "epoch": 0.06299212598425197, "grad_norm": 0.12454290688037872, "learning_rate": 1e-06, "loss": -0.0053, "step": 27 }, { "clip_ratio/high_max": 0.002598469887743704, "clip_ratio/high_mean": 0.0009599390996299917, "clip_ratio/low_mean": 0.0007723805028945208, "clip_ratio/low_min": 1.7025333363562822e-05, "clip_ratio/region_mean": 0.0017323195716016926, "epoch": 0.06532516768737241, "grad_norm": 0.12763994932174683, "learning_rate": 1e-06, "loss": -0.0053, "step": 28 }, { "clip_ratio/high_max": 0.0020726699076476507, "clip_ratio/high_mean": 0.0007925759437057422, "clip_ratio/low_mean": 0.0005854478949913755, "clip_ratio/low_min": 3.8003969166311435e-05, "clip_ratio/region_mean": 0.00137802386234398, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3542.0, "completions/mean_length": 634.6217041015625, "completions/mean_terminated_length": 599.5005493164062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.06765820939049286, "grad_norm": 0.5456535816192627, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 4541270.0, "reward": 0.5580357313156128, "reward_std": 0.20008507370948792, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 29 }, { "clip_ratio/high_max": 0.0020458855615288485, "clip_ratio/high_mean": 0.0009045126698765671, "clip_ratio/low_mean": 0.000561854418265284, "clip_ratio/low_min": 2.638947989908047e-05, "clip_ratio/region_mean": 0.0014663670808658935, "epoch": 0.0699912510936133, "grad_norm": 0.11541905999183655, "learning_rate": 1e-06, "loss": 0.0011, "step": 30 }, { "clip_ratio/high_max": 0.0021187146121519618, "clip_ratio/high_mean": 0.0009244540615327423, "clip_ratio/low_mean": 0.0005486218560690759, "clip_ratio/low_min": 7.895751514297444e-05, "clip_ratio/region_mean": 0.001473075884860009, "epoch": 0.07232429279673375, "grad_norm": 0.1175270602107048, "learning_rate": 1e-06, "loss": 0.0011, "step": 31 }, { "clip_ratio/high_max": 0.0019736517933779396, "clip_ratio/high_mean": 0.0008241707964771194, "clip_ratio/low_mean": 0.0007038313087832648, "clip_ratio/low_min": 3.9848686355981044e-05, "clip_ratio/region_mean": 0.0015280021034413949, "epoch": 0.07465733449985419, "grad_norm": 0.1159655973315239, "learning_rate": 1e-06, "loss": 0.001, "step": 32 }, { "clip_ratio/high_max": 0.0025189921507262625, "clip_ratio/high_mean": 0.001001935266685905, "clip_ratio/low_mean": 0.0005562257865676656, "clip_ratio/low_min": 2.4099123947962653e-05, "clip_ratio/region_mean": 0.001558161064167507, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 690.3158569335938, "completions/mean_terminated_length": 580.455078125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.07699037620297462, "grad_norm": 0.12106721103191376, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 5134081.0, "reward": 0.5491071939468384, "reward_std": 0.19587527215480804, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 33 }, { "clip_ratio/high_max": 0.0023387170949717984, "clip_ratio/high_mean": 0.0009437463704671245, "clip_ratio/low_mean": 0.0005400157479016343, "clip_ratio/low_min": 5.05176794831641e-05, "clip_ratio/region_mean": 0.0014837621383776423, "epoch": 0.07932341790609507, "grad_norm": 0.12077457457780838, "learning_rate": 1e-06, "loss": 0.0059, "step": 34 }, { "clip_ratio/high_max": 0.0023844349198043346, "clip_ratio/high_mean": 0.0009738284497871064, "clip_ratio/low_mean": 0.000605464811087586, "clip_ratio/low_min": 2.3907311515358742e-05, "clip_ratio/region_mean": 0.0015792932535987347, "epoch": 0.08165645960921551, "grad_norm": 0.12228137254714966, "learning_rate": 1e-06, "loss": 0.0059, "step": 35 }, { "clip_ratio/high_max": 0.0022718248146702535, "clip_ratio/high_mean": 0.0010037171305157244, "clip_ratio/low_mean": 0.0006731703197146999, "clip_ratio/low_min": 3.3701478969305754e-05, "clip_ratio/region_mean": 0.0016768874629633501, "epoch": 0.08398950131233596, "grad_norm": 0.11798576265573502, "learning_rate": 1e-06, "loss": 0.0058, "step": 36 }, { "clip_ratio/high_max": 0.0019461075207800604, "clip_ratio/high_mean": 0.0009393774853379, "clip_ratio/low_mean": 0.0007070608717185678, "clip_ratio/low_min": 5.712762231269153e-05, "clip_ratio/region_mean": 0.0016464384170831181, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3490.0, "completions/mean_length": 603.1506958007812, "completions/mean_terminated_length": 571.68359375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.0863225430154564, "grad_norm": 0.12857714295387268, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 5736976.0, "reward": 0.527901828289032, "reward_std": 0.2249252200126648, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 37 }, { "clip_ratio/high_max": 0.002086070668156026, "clip_ratio/high_mean": 0.0008970495127869071, "clip_ratio/low_mean": 0.0008530912127753254, "clip_ratio/low_min": 0.0001104946359191672, "clip_ratio/region_mean": 0.0017501407128293067, "epoch": 0.08865558471857685, "grad_norm": 0.1285839080810547, "learning_rate": 1e-06, "loss": 0.0137, "step": 38 }, { "clip_ratio/high_max": 0.0022732102224836126, "clip_ratio/high_mean": 0.0010200744363828562, "clip_ratio/low_mean": 0.0007814313721610233, "clip_ratio/low_min": 6.999115794315003e-05, "clip_ratio/region_mean": 0.0018015058230957948, "epoch": 0.09098862642169729, "grad_norm": 0.12550336122512817, "learning_rate": 1e-06, "loss": 0.0136, "step": 39 }, { "clip_ratio/high_max": 0.002514178937417455, "clip_ratio/high_mean": 0.0010351967757742386, "clip_ratio/low_mean": 0.0008092996376944939, "clip_ratio/low_min": 3.585418289731024e-05, "clip_ratio/region_mean": 0.001844496415287722, "epoch": 0.09332166812481774, "grad_norm": 0.12510190904140472, "learning_rate": 1e-06, "loss": 0.0136, "step": 40 }, { "clip_ratio/high_max": 0.0019224399802624248, "clip_ratio/high_mean": 0.0007265598496815073, "clip_ratio/low_mean": 0.0004063377855345607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001132897610659711, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 648.1105346679688, "completions/mean_terminated_length": 581.427734375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.09565470982793818, "grad_norm": 0.1042192205786705, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 6332395.0, "reward": 0.5033482313156128, "reward_std": 0.15623900294303894, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 41 }, { "clip_ratio/high_max": 0.0018315915513085201, "clip_ratio/high_mean": 0.0006311913566605654, "clip_ratio/low_mean": 0.00046994193871796597, "clip_ratio/low_min": 1.407340732839657e-05, "clip_ratio/region_mean": 0.0011011332953785313, "epoch": 0.09798775153105861, "grad_norm": 0.10411492735147476, "learning_rate": 1e-06, "loss": 0.0015, "step": 42 }, { "clip_ratio/high_max": 0.001956779608008219, "clip_ratio/high_mean": 0.0006719230214002891, "clip_ratio/low_mean": 0.0005001431281925761, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011720661368599394, "epoch": 0.10032079323417906, "grad_norm": 0.104643315076828, "learning_rate": 1e-06, "loss": 0.0015, "step": 43 }, { "clip_ratio/high_max": 0.0019334753123985138, "clip_ratio/high_mean": 0.0006953563024580944, "clip_ratio/low_mean": 0.0004930834875267465, "clip_ratio/low_min": 2.7530476472747978e-05, "clip_ratio/region_mean": 0.0011884397972607985, "epoch": 0.1026538349372995, "grad_norm": 0.10284064710140228, "learning_rate": 1e-06, "loss": 0.0014, "step": 44 }, { "clip_ratio/high_max": 0.0022713656962878304, "clip_ratio/high_mean": 0.000851228012379579, "clip_ratio/low_mean": 0.0007121152375475504, "clip_ratio/low_min": 5.23063727086992e-05, "clip_ratio/region_mean": 0.0015633432631148025, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 639.6473388671875, "completions/mean_terminated_length": 556.69482421875, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.10498687664041995, "grad_norm": 0.11585110425949097, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 6921983.0, "reward": 0.5602678656578064, "reward_std": 0.18772776424884796, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 45 }, { "clip_ratio/high_max": 0.0020406849653227255, "clip_ratio/high_mean": 0.0007914970719866687, "clip_ratio/low_mean": 0.0006986417829466518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014901388494763523, "epoch": 0.10731991834354039, "grad_norm": 0.11702017486095428, "learning_rate": 1e-06, "loss": 0.0006, "step": 46 }, { "clip_ratio/high_max": 0.002258400112623349, "clip_ratio/high_mean": 0.0008443867245659931, "clip_ratio/low_mean": 0.0007599261289215065, "clip_ratio/low_min": 9.40942427405389e-05, "clip_ratio/region_mean": 0.0016043128925957717, "epoch": 0.10965296004666084, "grad_norm": 0.11548767238855362, "learning_rate": 1e-06, "loss": 0.0005, "step": 47 }, { "clip_ratio/high_max": 0.0023966401131474413, "clip_ratio/high_mean": 0.0008740313314774539, "clip_ratio/low_mean": 0.0007647992988495389, "clip_ratio/low_min": 4.015419108327478e-05, "clip_ratio/region_mean": 0.0016388305884902366, "epoch": 0.11198600174978128, "grad_norm": 0.11666729301214218, "learning_rate": 1e-06, "loss": 0.0005, "step": 48 }, { "clip_ratio/high_max": 0.0024396584485657513, "clip_ratio/high_mean": 0.0010844070930033922, "clip_ratio/low_mean": 0.0005507529103851994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016351599842892028, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4005.0, "completions/mean_length": 673.9185791015625, "completions/mean_terminated_length": 611.6988525390625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.11431904345290173, "grad_norm": 0.12249033153057098, "learning_rate": 1e-06, "loss": 0.0094, "num_tokens": 7548270.0, "reward": 0.5792410969734192, "reward_std": 0.21899265050888062, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 49 }, { "clip_ratio/high_max": 0.0023946298970258795, "clip_ratio/high_mean": 0.0010842641640920192, "clip_ratio/low_mean": 0.0006478311806858983, "clip_ratio/low_min": 3.348513200762682e-05, "clip_ratio/region_mean": 0.0017320953193120658, "epoch": 0.11665208515602217, "grad_norm": 0.11938592791557312, "learning_rate": 1e-06, "loss": 0.0093, "step": 50 }, { "clip_ratio/high_max": 0.002634725155076012, "clip_ratio/high_mean": 0.0011958351060457062, "clip_ratio/low_mean": 0.0006637123424297897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018595474612084217, "epoch": 0.1189851268591426, "grad_norm": 0.11797577887773514, "learning_rate": 1e-06, "loss": 0.0093, "step": 51 }, { "clip_ratio/high_max": 0.0026076720969285816, "clip_ratio/high_mean": 0.0012316755710344296, "clip_ratio/low_mean": 0.0006770574509573635, "clip_ratio/low_min": 3.88279149774462e-05, "clip_ratio/region_mean": 0.0019087330292677507, "epoch": 0.12131816856226305, "grad_norm": 0.1196659728884697, "learning_rate": 1e-06, "loss": 0.0092, "step": 52 }, { "clip_ratio/high_max": 0.002361176644626539, "clip_ratio/high_mean": 0.0009833683361648582, "clip_ratio/low_mean": 0.000514935546561901, "clip_ratio/low_min": 3.137060230073985e-05, "clip_ratio/region_mean": 0.0014983038599893916, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3116.0, "completions/mean_length": 611.7288208007812, "completions/mean_terminated_length": 540.2973022460938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.12365121026538349, "grad_norm": 0.12889464199543, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 8118763.0, "reward": 0.5647321939468384, "reward_std": 0.21717889606952667, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 53 }, { "clip_ratio/high_max": 0.002574211946921423, "clip_ratio/high_mean": 0.0010823394914041273, "clip_ratio/low_mean": 0.0006075831352063688, "clip_ratio/low_min": 2.941975435533095e-05, "clip_ratio/region_mean": 0.0016899226684472524, "epoch": 0.12598425196850394, "grad_norm": 0.1291535198688507, "learning_rate": 1e-06, "loss": -0.0047, "step": 54 }, { "clip_ratio/high_max": 0.0029201378565630876, "clip_ratio/high_mean": 0.0010911595054494683, "clip_ratio/low_mean": 0.0006981909027672373, "clip_ratio/low_min": 4.9519567255629227e-05, "clip_ratio/region_mean": 0.0017893504264065996, "epoch": 0.1283172936716244, "grad_norm": 0.12540559470653534, "learning_rate": 1e-06, "loss": -0.0048, "step": 55 }, { "clip_ratio/high_max": 0.0027021527857868932, "clip_ratio/high_mean": 0.0011816199439635966, "clip_ratio/low_mean": 0.0007257064462464768, "clip_ratio/low_min": 7.557811022707028e-05, "clip_ratio/region_mean": 0.0019073264120379463, "epoch": 0.13065033537474482, "grad_norm": 0.12559323012828827, "learning_rate": 1e-06, "loss": -0.0048, "step": 56 }, { "clip_ratio/high_max": 0.001480159473430831, "clip_ratio/high_mean": 0.0006684714226139477, "clip_ratio/low_mean": 0.0005100549988128478, "clip_ratio/low_min": 2.6736582185549196e-05, "clip_ratio/region_mean": 0.0011785264177888166, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 648.8248291015625, "completions/mean_terminated_length": 609.9176025390625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.13298337707786526, "grad_norm": 0.10933328419923782, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 8752614.0, "reward": 0.5691964626312256, "reward_std": 0.1777365505695343, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 57 }, { "clip_ratio/high_max": 0.0016812803442007862, "clip_ratio/high_mean": 0.0006870662928122329, "clip_ratio/low_mean": 0.0006017237737978576, "clip_ratio/low_min": 1.7380421923007816e-05, "clip_ratio/region_mean": 0.0012887900484201964, "epoch": 0.13531641878098571, "grad_norm": 0.10893791168928146, "learning_rate": 1e-06, "loss": 0.0091, "step": 58 }, { "clip_ratio/high_max": 0.0018531538225943223, "clip_ratio/high_mean": 0.0007629757419636007, "clip_ratio/low_mean": 0.0005963998628431, "clip_ratio/low_min": 1.0539629329286981e-05, "clip_ratio/region_mean": 0.0013593756229965948, "epoch": 0.13764946048410615, "grad_norm": 0.1082148477435112, "learning_rate": 1e-06, "loss": 0.009, "step": 59 }, { "clip_ratio/high_max": 0.0018241670331917703, "clip_ratio/high_mean": 0.00077517476711364, "clip_ratio/low_mean": 0.0006059930638002697, "clip_ratio/low_min": 2.255886010971153e-05, "clip_ratio/region_mean": 0.0013811678436468355, "epoch": 0.1399825021872266, "grad_norm": 0.10482211410999298, "learning_rate": 1e-06, "loss": 0.009, "step": 60 }, { "clip_ratio/high_max": 0.0019219535315642133, "clip_ratio/high_mean": 0.0007842094819352496, "clip_ratio/low_mean": 0.0006277976317505818, "clip_ratio/low_min": 5.6122129535651766e-05, "clip_ratio/region_mean": 0.0014120071355137043, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3789.0, "completions/mean_length": 679.9788208007812, "completions/mean_terminated_length": 597.9942626953125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.14231554389034703, "grad_norm": 0.12563250958919525, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 9367323.0, "reward": 0.5758928656578064, "reward_std": 0.20808032155036926, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 61 }, { "clip_ratio/high_max": 0.0020141978457104415, "clip_ratio/high_mean": 0.0007680553062527906, "clip_ratio/low_mean": 0.0006790035076846834, "clip_ratio/low_min": 2.1605384063150268e-05, "clip_ratio/region_mean": 0.001447058813937474, "epoch": 0.1446485855934675, "grad_norm": 0.12740936875343323, "learning_rate": 1e-06, "loss": 0.0009, "step": 62 }, { "clip_ratio/high_max": 0.0020804079176741652, "clip_ratio/high_mean": 0.0008442968573945109, "clip_ratio/low_mean": 0.0007259415087901289, "clip_ratio/low_min": 4.318687660997966e-05, "clip_ratio/region_mean": 0.0015702383825555444, "epoch": 0.14698162729658792, "grad_norm": 0.12095212191343307, "learning_rate": 1e-06, "loss": 0.0009, "step": 63 }, { "clip_ratio/high_max": 0.0021274119717418216, "clip_ratio/high_mean": 0.0008743122816667892, "clip_ratio/low_mean": 0.0007465376565960469, "clip_ratio/low_min": 2.293694797117496e-05, "clip_ratio/region_mean": 0.0016208499582717195, "epoch": 0.14931466899970838, "grad_norm": 0.12097689509391785, "learning_rate": 1e-06, "loss": 0.0008, "step": 64 }, { "clip_ratio/high_max": 0.0017721720396366436, "clip_ratio/high_mean": 0.0007279613582795719, "clip_ratio/low_mean": 0.0005792271008431271, "clip_ratio/low_min": 2.9184464438003488e-05, "clip_ratio/region_mean": 0.0013071884459350258, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2818.0, "completions/mean_length": 673.1350708007812, "completions/mean_terminated_length": 610.901123046875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.15164771070282881, "grad_norm": 0.11224057525396347, "learning_rate": 1e-06, "loss": 0.012, "num_tokens": 9990540.0, "reward": 0.5803571939468384, "reward_std": 0.183932363986969, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 65 }, { "clip_ratio/high_max": 0.002080517697322648, "clip_ratio/high_mean": 0.0007986391337908572, "clip_ratio/low_mean": 0.0005779896255262429, "clip_ratio/low_min": 9.075069101527333e-06, "clip_ratio/region_mean": 0.0013766287556791212, "epoch": 0.15398075240594924, "grad_norm": 0.1100904792547226, "learning_rate": 1e-06, "loss": 0.0119, "step": 66 }, { "clip_ratio/high_max": 0.0019112537047476508, "clip_ratio/high_mean": 0.0007441622838086914, "clip_ratio/low_mean": 0.0005799345890409313, "clip_ratio/low_min": 2.7225207304582e-05, "clip_ratio/region_mean": 0.0013240968764876015, "epoch": 0.1563137941090697, "grad_norm": 0.1066666916012764, "learning_rate": 1e-06, "loss": 0.0119, "step": 67 }, { "clip_ratio/high_max": 0.0018870589774451219, "clip_ratio/high_mean": 0.0007005038023635279, "clip_ratio/low_mean": 0.0006698665893054567, "clip_ratio/low_min": 3.5538897463993635e-05, "clip_ratio/region_mean": 0.0013703704171348363, "epoch": 0.15864683581219013, "grad_norm": 0.10753520578145981, "learning_rate": 1e-06, "loss": 0.0118, "step": 68 }, { "clip_ratio/high_max": 0.002255437269923277, "clip_ratio/high_mean": 0.00083244996767462, "clip_ratio/low_mean": 0.0006011598597979173, "clip_ratio/low_min": 1.5516385246883146e-05, "clip_ratio/region_mean": 0.0014336097883642651, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3977.0, "completions/mean_length": 744.029052734375, "completions/mean_terminated_length": 627.909912109375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.1609798775153106, "grad_norm": 0.12573407590389252, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 10615542.0, "reward": 0.5457589626312256, "reward_std": 0.21102090179920197, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 69 }, { "clip_ratio/high_max": 0.002383463692240184, "clip_ratio/high_mean": 0.0008910680335247889, "clip_ratio/low_mean": 0.0006108471789048053, "clip_ratio/low_min": 1.2209416127006989e-05, "clip_ratio/region_mean": 0.0015019151869637426, "epoch": 0.16331291921843102, "grad_norm": 0.12489623576402664, "learning_rate": 1e-06, "loss": 0.0007, "step": 70 }, { "clip_ratio/high_max": 0.0025675459182821214, "clip_ratio/high_mean": 0.000918596664632787, "clip_ratio/low_mean": 0.00065482436275488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001573421079228865, "epoch": 0.16564596092155148, "grad_norm": 0.1202917993068695, "learning_rate": 1e-06, "loss": 0.0006, "step": 71 }, { "clip_ratio/high_max": 0.00216635723154468, "clip_ratio/high_mean": 0.0008386127738049254, "clip_ratio/low_mean": 0.00079518149868818, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001633794261579169, "epoch": 0.1679790026246719, "grad_norm": 0.12023834884166718, "learning_rate": 1e-06, "loss": 0.0005, "step": 72 }, { "clip_ratio/high_max": 0.0022574255272047594, "clip_ratio/high_mean": 0.0010894977640418801, "clip_ratio/low_mean": 0.0006713761194987455, "clip_ratio/low_min": 2.466228852426866e-05, "clip_ratio/region_mean": 0.0017608738708076999, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3927.0, "completions/mean_length": 693.4330444335938, "completions/mean_terminated_length": 603.7892456054688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.17031204432779237, "grad_norm": 0.12920650839805603, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 11237106.0, "reward": 0.6462053656578064, "reward_std": 0.23270179331302643, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 73 }, { "clip_ratio/high_max": 0.0024814972712192684, "clip_ratio/high_mean": 0.0011090909101767465, "clip_ratio/low_mean": 0.0007159462802519556, "clip_ratio/low_min": 7.610722059325781e-05, "clip_ratio/region_mean": 0.0018250371795147657, "epoch": 0.1726450860309128, "grad_norm": 0.12518727779388428, "learning_rate": 1e-06, "loss": -0.0112, "step": 74 }, { "clip_ratio/high_max": 0.002484422017005272, "clip_ratio/high_mean": 0.0011301900376565754, "clip_ratio/low_mean": 0.000807268574135378, "clip_ratio/low_min": 7.067933984217234e-05, "clip_ratio/region_mean": 0.0019374586045159958, "epoch": 0.17497812773403323, "grad_norm": 0.12647196650505066, "learning_rate": 1e-06, "loss": -0.0112, "step": 75 }, { "clip_ratio/high_max": 0.002753743996436242, "clip_ratio/high_mean": 0.001169821829535067, "clip_ratio/low_mean": 0.0008253856267401716, "clip_ratio/low_min": 1.7566047972650267e-05, "clip_ratio/region_mean": 0.0019952075017499737, "epoch": 0.1773111694371537, "grad_norm": 0.12645022571086884, "learning_rate": 1e-06, "loss": -0.0113, "step": 76 }, { "clip_ratio/high_max": 0.0020938037196174264, "clip_ratio/high_mean": 0.0009612362973712152, "clip_ratio/low_mean": 0.0005711183875973802, "clip_ratio/low_min": 9.481189408688806e-06, "clip_ratio/region_mean": 0.0015323546904255636, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3164.0, "completions/mean_length": 631.7980346679688, "completions/mean_terminated_length": 588.7401123046875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.17964421114027412, "grad_norm": 0.1227927878499031, "learning_rate": 1e-06, "loss": 0.0081, "num_tokens": 11849309.0, "reward": 0.6573660969734192, "reward_std": 0.21256154775619507, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 77 }, { "clip_ratio/high_max": 0.0024740870139794424, "clip_ratio/high_mean": 0.0010596701467875391, "clip_ratio/low_mean": 0.0005538311197597068, "clip_ratio/low_min": 9.481189408688806e-06, "clip_ratio/region_mean": 0.00161350128473714, "epoch": 0.18197725284339458, "grad_norm": 0.12212949991226196, "learning_rate": 1e-06, "loss": 0.008, "step": 78 }, { "clip_ratio/high_max": 0.0024700295252841897, "clip_ratio/high_mean": 0.0012013887499051634, "clip_ratio/low_mean": 0.000628448409770499, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018298371360288002, "epoch": 0.184310294546515, "grad_norm": 0.11775192618370056, "learning_rate": 1e-06, "loss": 0.0079, "step": 79 }, { "clip_ratio/high_max": 0.002141594362910837, "clip_ratio/high_mean": 0.0010811852807819378, "clip_ratio/low_mean": 0.00062928111947258, "clip_ratio/low_min": 2.117208714480512e-05, "clip_ratio/region_mean": 0.0017104663857026026, "epoch": 0.18664333624963547, "grad_norm": 0.11759623885154724, "learning_rate": 1e-06, "loss": 0.0079, "step": 80 }, { "clip_ratio/high_max": 0.0020067997102160007, "clip_ratio/high_mean": 0.000659712626656983, "clip_ratio/low_mean": 0.0005423080428954563, "clip_ratio/low_min": 1.335755496256752e-05, "clip_ratio/region_mean": 0.0012020206668239553, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3609.0, "completions/mean_length": 675.0267944335938, "completions/mean_terminated_length": 576.8358154296875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.1889763779527559, "grad_norm": 0.12829092144966125, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 12433925.0, "reward": 0.5848214626312256, "reward_std": 0.17491792142391205, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 81 }, { "clip_ratio/high_max": 0.0022881202967255376, "clip_ratio/high_mean": 0.000796856433225912, "clip_ratio/low_mean": 0.0006352322379825637, "clip_ratio/low_min": 2.025603680522181e-05, "clip_ratio/region_mean": 0.00143208865847555, "epoch": 0.19130941965587636, "grad_norm": 0.12727873027324677, "learning_rate": 1e-06, "loss": 0.0076, "step": 82 }, { "clip_ratio/high_max": 0.002043675547611201, "clip_ratio/high_mean": 0.0007276658743649023, "clip_ratio/low_mean": 0.0006893798854434863, "clip_ratio/low_min": 5.2956676881876774e-05, "clip_ratio/region_mean": 0.0014170457579893991, "epoch": 0.1936424613589968, "grad_norm": 0.12437974661588669, "learning_rate": 1e-06, "loss": 0.0075, "step": 83 }, { "clip_ratio/high_max": 0.0022514698357554153, "clip_ratio/high_mean": 0.000807607255410403, "clip_ratio/low_mean": 0.0006569579963979777, "clip_ratio/low_min": 2.5267838282161392e-05, "clip_ratio/region_mean": 0.0014645652772742324, "epoch": 0.19597550306211722, "grad_norm": 0.12210484594106674, "learning_rate": 1e-06, "loss": 0.0075, "step": 84 }, { "clip_ratio/high_max": 0.0017875568446470425, "clip_ratio/high_mean": 0.0007749243031867081, "clip_ratio/low_mean": 0.0005387329920267803, "clip_ratio/low_min": 2.099582434311742e-05, "clip_ratio/region_mean": 0.0013136572815710679, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3865.0, "completions/mean_length": 751.896240234375, "completions/mean_terminated_length": 679.4469604492188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.19830854476523768, "grad_norm": 0.1347719430923462, "learning_rate": 1e-06, "loss": 0.0214, "num_tokens": 13125232.0, "reward": 0.5245535969734192, "reward_std": 0.2205638438463211, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 85 }, { "clip_ratio/high_max": 0.0017526951851323247, "clip_ratio/high_mean": 0.0008497237831761595, "clip_ratio/low_mean": 0.0006477980368799763, "clip_ratio/low_min": 8.36631515994668e-05, "clip_ratio/region_mean": 0.001497521843702998, "epoch": 0.2006415864683581, "grad_norm": 0.1237320825457573, "learning_rate": 1e-06, "loss": 0.0212, "step": 86 }, { "clip_ratio/high_max": 0.0019072150425927248, "clip_ratio/high_mean": 0.0008460909339191858, "clip_ratio/low_mean": 0.0006387746361724567, "clip_ratio/low_min": 3.4179878639406525e-05, "clip_ratio/region_mean": 0.0014848655882815365, "epoch": 0.20297462817147857, "grad_norm": 0.13195738196372986, "learning_rate": 1e-06, "loss": 0.0212, "step": 87 }, { "clip_ratio/high_max": 0.0020050143939442933, "clip_ratio/high_mean": 0.0008305341380037135, "clip_ratio/low_mean": 0.0006782394857509644, "clip_ratio/low_min": 3.965570886066416e-05, "clip_ratio/region_mean": 0.0015087736101122573, "epoch": 0.205307669874599, "grad_norm": 0.11794842034578323, "learning_rate": 1e-06, "loss": 0.0211, "step": 88 }, { "clip_ratio/high_max": 0.002177931266487576, "clip_ratio/high_mean": 0.000959723200139706, "clip_ratio/low_mean": 0.00042992017915821634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013896433811169118, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3866.0, "completions/mean_length": 701.4096069335938, "completions/mean_terminated_length": 631.816650390625, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.20764071157771946, "grad_norm": 0.1262165606021881, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 13764303.0, "reward": 0.6551339626312256, "reward_std": 0.19271966814994812, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 89 }, { "clip_ratio/high_max": 0.0021290587283147033, "clip_ratio/high_mean": 0.0008979748818092048, "clip_ratio/low_mean": 0.0005654924921145721, "clip_ratio/low_min": 1.236888965649996e-05, "clip_ratio/region_mean": 0.0014634673643740825, "epoch": 0.2099737532808399, "grad_norm": 0.11655435711145401, "learning_rate": 1e-06, "loss": 0.0009, "step": 90 }, { "clip_ratio/high_max": 0.0021737122515332885, "clip_ratio/high_mean": 0.001014966022921726, "clip_ratio/low_mean": 0.0005387476294345106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015537136641796678, "epoch": 0.21230679498396035, "grad_norm": 0.11609027534723282, "learning_rate": 1e-06, "loss": 0.0008, "step": 91 }, { "clip_ratio/high_max": 0.0021615583973471075, "clip_ratio/high_mean": 0.001014498886434012, "clip_ratio/low_mean": 0.0005886922863282962, "clip_ratio/low_min": 1.2252499800524674e-05, "clip_ratio/region_mean": 0.001603191121830605, "epoch": 0.21463983668708078, "grad_norm": 0.11582092940807343, "learning_rate": 1e-06, "loss": 0.0007, "step": 92 }, { "clip_ratio/high_max": 0.0022276021518337075, "clip_ratio/high_mean": 0.0008690342474437784, "clip_ratio/low_mean": 0.0004145386010350194, "clip_ratio/low_min": 1.4236901733966079e-05, "clip_ratio/region_mean": 0.0012835728703066707, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3672.0, "completions/mean_length": 757.8772583007812, "completions/mean_terminated_length": 650.1958618164062, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.2169728783902012, "grad_norm": 0.11606104671955109, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 14412969.0, "reward": 0.5334821939468384, "reward_std": 0.2027539610862732, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 93 }, { "clip_ratio/high_max": 0.001921585288073402, "clip_ratio/high_mean": 0.000804271679953672, "clip_ratio/low_mean": 0.0005507116366061382, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013549833347497042, "epoch": 0.21930592009332167, "grad_norm": 0.11132045090198517, "learning_rate": 1e-06, "loss": -0.0011, "step": 94 }, { "clip_ratio/high_max": 0.0020796068529307377, "clip_ratio/high_mean": 0.0009142383059952408, "clip_ratio/low_mean": 0.0005650026541843545, "clip_ratio/low_min": 1.552795038151089e-05, "clip_ratio/region_mean": 0.001479240909247892, "epoch": 0.2216389617964421, "grad_norm": 0.11098887771368027, "learning_rate": 1e-06, "loss": -0.0012, "step": 95 }, { "clip_ratio/high_max": 0.0020744604553328827, "clip_ratio/high_mean": 0.0009283198269258719, "clip_ratio/low_mean": 0.0005834558178321458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015117756483959965, "epoch": 0.22397200349956256, "grad_norm": 0.11064641177654266, "learning_rate": 1e-06, "loss": -0.0012, "step": 96 }, { "clip_ratio/high_max": 0.0018035905086435378, "clip_ratio/high_mean": 0.0006336834176181583, "clip_ratio/low_mean": 0.0004415467601575074, "clip_ratio/low_min": 1.5830801203264855e-05, "clip_ratio/region_mean": 0.001075230167771224, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3272.0, "completions/mean_length": 783.9542846679688, "completions/mean_terminated_length": 657.305908203125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.226305045202683, "grad_norm": 0.10582996904850006, "learning_rate": 1e-06, "loss": -0.0075, "num_tokens": 15065456.0, "reward": 0.5491071939468384, "reward_std": 0.16799576580524445, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 97 }, { "clip_ratio/high_max": 0.0018994301608472597, "clip_ratio/high_mean": 0.0006660413391728071, "clip_ratio/low_mean": 0.0005406120199040743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012066533636243548, "epoch": 0.22863808690580345, "grad_norm": 0.10346316546201706, "learning_rate": 1e-06, "loss": -0.0076, "step": 98 }, { "clip_ratio/high_max": 0.0019204455238650553, "clip_ratio/high_mean": 0.0007006494915913208, "clip_ratio/low_mean": 0.0005696131893273559, "clip_ratio/low_min": 1.9623234038590454e-05, "clip_ratio/region_mean": 0.0012702626991085708, "epoch": 0.23097112860892388, "grad_norm": 0.10351712256669998, "learning_rate": 1e-06, "loss": -0.0076, "step": 99 }, { "clip_ratio/high_max": 0.001992961682844907, "clip_ratio/high_mean": 0.0007579088505735854, "clip_ratio/low_mean": 0.0005693528673873516, "clip_ratio/low_min": 1.4357913642015774e-05, "clip_ratio/region_mean": 0.001327261696133064, "epoch": 0.23330417031204434, "grad_norm": 0.10089043527841568, "learning_rate": 1e-06, "loss": -0.0077, "step": 100 }, { "clip_ratio/high_max": 0.0020775325829163194, "clip_ratio/high_mean": 0.000898531961865956, "clip_ratio/low_mean": 0.0005592647294179187, "clip_ratio/low_min": 1.3478542314260267e-05, "clip_ratio/region_mean": 0.0014577966758224647, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2783.0, "completions/mean_length": 713.7980346679688, "completions/mean_terminated_length": 624.6907348632812, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.23563721201516477, "grad_norm": 0.12337063997983932, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 15698283.0, "reward": 0.515625, "reward_std": 0.208393394947052, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 101 }, { "clip_ratio/high_max": 0.0021313432771421503, "clip_ratio/high_mean": 0.0008682078159836237, "clip_ratio/low_mean": 0.0005763536755694076, "clip_ratio/low_min": 5.07823315274436e-05, "clip_ratio/region_mean": 0.0014445614397118334, "epoch": 0.2379702537182852, "grad_norm": 0.12123948335647583, "learning_rate": 1e-06, "loss": -0.003, "step": 102 }, { "clip_ratio/high_max": 0.0020499164384091273, "clip_ratio/high_mean": 0.0009460637502343161, "clip_ratio/low_mean": 0.0006619869213864149, "clip_ratio/low_min": 6.325949379970552e-05, "clip_ratio/region_mean": 0.00160805069754133, "epoch": 0.24030329542140566, "grad_norm": 0.12233118712902069, "learning_rate": 1e-06, "loss": -0.0031, "step": 103 }, { "clip_ratio/high_max": 0.002263504677102901, "clip_ratio/high_mean": 0.0010434493324282812, "clip_ratio/low_mean": 0.0006765964812984748, "clip_ratio/low_min": 3.5007469705305994e-05, "clip_ratio/region_mean": 0.0017200458387378603, "epoch": 0.2426363371245261, "grad_norm": 0.11790360510349274, "learning_rate": 1e-06, "loss": -0.0032, "step": 104 }, { "clip_ratio/high_max": 0.0018195305383414961, "clip_ratio/high_mean": 0.0007487867878808174, "clip_ratio/low_mean": 0.0006116460936027579, "clip_ratio/low_min": 4.229095429764129e-05, "clip_ratio/region_mean": 0.0013604328778455965, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 719.6551513671875, "completions/mean_terminated_length": 638.6228637695312, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.24496937882764655, "grad_norm": 0.12254764884710312, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 16345198.0, "reward": 0.5926339626312256, "reward_std": 0.1953539252281189, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 105 }, { "clip_ratio/high_max": 0.002051661896985024, "clip_ratio/high_mean": 0.0008401589839195367, "clip_ratio/low_mean": 0.0006876413135614712, "clip_ratio/low_min": 2.0885744561383035e-05, "clip_ratio/region_mean": 0.0015278002974810079, "epoch": 0.24730242053076698, "grad_norm": 0.11948426812887192, "learning_rate": 1e-06, "loss": 0.0061, "step": 106 }, { "clip_ratio/high_max": 0.002034528413787484, "clip_ratio/high_mean": 0.0008906658695195802, "clip_ratio/low_mean": 0.0006405252897820901, "clip_ratio/low_min": 3.906910933437757e-05, "clip_ratio/region_mean": 0.0015311911593016703, "epoch": 0.24963546223388744, "grad_norm": 0.11491391807794571, "learning_rate": 1e-06, "loss": 0.0061, "step": 107 }, { "clip_ratio/high_max": 0.0022034361900296062, "clip_ratio/high_mean": 0.000915008429728914, "clip_ratio/low_mean": 0.0007042651959636714, "clip_ratio/low_min": 5.243011764832772e-05, "clip_ratio/region_mean": 0.00161927367298631, "epoch": 0.25196850393700787, "grad_norm": 0.11283533275127411, "learning_rate": 1e-06, "loss": 0.0059, "step": 108 }, { "clip_ratio/high_max": 0.0017909595517267007, "clip_ratio/high_mean": 0.000803101447672816, "clip_ratio/low_mean": 0.0006673314010186004, "clip_ratio/low_min": 2.2123893359093927e-05, "clip_ratio/region_mean": 0.0014704328677908052, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 686.8683471679688, "completions/mean_terminated_length": 613.01025390625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.2543015456401283, "grad_norm": 0.12865006923675537, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 16972800.0, "reward": 0.5580357313156128, "reward_std": 0.208393394947052, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 109 }, { "clip_ratio/high_max": 0.0022757159131288063, "clip_ratio/high_mean": 0.0008571879370720126, "clip_ratio/low_mean": 0.0006255682192204404, "clip_ratio/low_min": 1.3086264516459778e-05, "clip_ratio/region_mean": 0.0014827561826677993, "epoch": 0.2566345873432488, "grad_norm": 0.1280517429113388, "learning_rate": 1e-06, "loss": 0.0051, "step": 110 }, { "clip_ratio/high_max": 0.002125467588484753, "clip_ratio/high_mean": 0.0009382402586197713, "clip_ratio/low_mean": 0.0007932517228255165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001731491975078825, "epoch": 0.2589676290463692, "grad_norm": 0.12392973154783249, "learning_rate": 1e-06, "loss": 0.005, "step": 111 }, { "clip_ratio/high_max": 0.002135084039764479, "clip_ratio/high_mean": 0.0009247274974768516, "clip_ratio/low_mean": 0.0008350851658178726, "clip_ratio/low_min": 3.177427424816415e-05, "clip_ratio/region_mean": 0.0017598127014935017, "epoch": 0.26130067074948965, "grad_norm": 0.12079077214002609, "learning_rate": 1e-06, "loss": 0.0049, "step": 112 }, { "clip_ratio/high_max": 0.002111727535520913, "clip_ratio/high_mean": 0.0008046593284234405, "clip_ratio/low_mean": 0.0006372523948812159, "clip_ratio/low_min": 4.736521623271983e-05, "clip_ratio/region_mean": 0.0014419117360375822, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 746.8292846679688, "completions/mean_terminated_length": 650.6991577148438, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.2636337124526101, "grad_norm": 0.13073855638504028, "learning_rate": 1e-06, "loss": -0.0151, "num_tokens": 17628543.0, "reward": 0.546875, "reward_std": 0.19355408847332, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 113 }, { "clip_ratio/high_max": 0.0023082344923750497, "clip_ratio/high_mean": 0.0008581400725233834, "clip_ratio/low_mean": 0.0007557807202829281, "clip_ratio/low_min": 5.032839544583112e-05, "clip_ratio/region_mean": 0.001613920772797428, "epoch": 0.2659667541557305, "grad_norm": 0.11874066293239594, "learning_rate": 1e-06, "loss": -0.0152, "step": 114 }, { "clip_ratio/high_max": 0.0023987352797121275, "clip_ratio/high_mean": 0.0008829241633065976, "clip_ratio/low_mean": 0.0007379411390502355, "clip_ratio/low_min": 6.181380194902886e-05, "clip_ratio/region_mean": 0.001620865317818243, "epoch": 0.268299795858851, "grad_norm": 0.12014667689800262, "learning_rate": 1e-06, "loss": -0.0153, "step": 115 }, { "clip_ratio/high_max": 0.0023658335558138788, "clip_ratio/high_mean": 0.0009125321139435982, "clip_ratio/low_mean": 0.0008032162932067877, "clip_ratio/low_min": 5.188590603211196e-05, "clip_ratio/region_mean": 0.0017157484144263435, "epoch": 0.27063283756197143, "grad_norm": 0.11507513374090195, "learning_rate": 1e-06, "loss": -0.0154, "step": 116 }, { "clip_ratio/high_max": 0.0018842575991584454, "clip_ratio/high_mean": 0.0007452981499227462, "clip_ratio/low_mean": 0.0006125343988969689, "clip_ratio/low_min": 1.9464341676211916e-05, "clip_ratio/region_mean": 0.0013578325379057787, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3563.0, "completions/mean_length": 668.2265625, "completions/mean_terminated_length": 597.9533081054688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.27296587926509186, "grad_norm": 0.12758295238018036, "learning_rate": 1e-06, "loss": 0.0085, "num_tokens": 18248874.0, "reward": 0.574776828289032, "reward_std": 0.1839316487312317, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 117 }, { "clip_ratio/high_max": 0.002070765132884844, "clip_ratio/high_mean": 0.0007927674223537906, "clip_ratio/low_mean": 0.0006206258149177302, "clip_ratio/low_min": 2.1570318494923413e-05, "clip_ratio/region_mean": 0.0014133932381810155, "epoch": 0.2752989209682123, "grad_norm": 0.1325443834066391, "learning_rate": 1e-06, "loss": 0.0084, "step": 118 }, { "clip_ratio/high_max": 0.0020533400384010747, "clip_ratio/high_mean": 0.0008499540062985034, "clip_ratio/low_mean": 0.0007282367914740462, "clip_ratio/low_min": 2.2806200831837486e-05, "clip_ratio/region_mean": 0.0015781907859491184, "epoch": 0.2776319626713328, "grad_norm": 0.11909475177526474, "learning_rate": 1e-06, "loss": 0.0083, "step": 119 }, { "clip_ratio/high_max": 0.0023961420301930048, "clip_ratio/high_mean": 0.001008371458738111, "clip_ratio/low_mean": 0.0007589720225951169, "clip_ratio/low_min": 2.2806200831837486e-05, "clip_ratio/region_mean": 0.0017673434922471642, "epoch": 0.2799650043744532, "grad_norm": 0.13036459684371948, "learning_rate": 1e-06, "loss": 0.0083, "step": 120 }, { "clip_ratio/high_max": 0.0019422599652898498, "clip_ratio/high_mean": 0.0007950556573632639, "clip_ratio/low_mean": 0.0006855169176560594, "clip_ratio/low_min": 6.888863936183043e-05, "clip_ratio/region_mean": 0.0014805725732003339, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3445.0, "completions/mean_length": 853.7913208007812, "completions/mean_terminated_length": 690.3505249023438, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.28229804607757364, "grad_norm": 0.13100755214691162, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 18922623.0, "reward": 0.5569196939468384, "reward_std": 0.22628207504749298, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 121 }, { "clip_ratio/high_max": 0.001962365240615327, "clip_ratio/high_mean": 0.0008065595593507169, "clip_ratio/low_mean": 0.000690796303388197, "clip_ratio/low_min": 4.447165883902926e-05, "clip_ratio/region_mean": 0.0014973558427300304, "epoch": 0.28463108778069407, "grad_norm": 0.13046561181545258, "learning_rate": 1e-06, "loss": -0.0077, "step": 122 }, { "clip_ratio/high_max": 0.002391522124526091, "clip_ratio/high_mean": 0.000978577607384068, "clip_ratio/low_mean": 0.0008066749633144354, "clip_ratio/low_min": 8.461663946945919e-05, "clip_ratio/region_mean": 0.0017852525925263762, "epoch": 0.2869641294838145, "grad_norm": 0.12878692150115967, "learning_rate": 1e-06, "loss": -0.0078, "step": 123 }, { "clip_ratio/high_max": 0.0021044093155069277, "clip_ratio/high_mean": 0.000915544760573539, "clip_ratio/low_mean": 0.0008358539853361435, "clip_ratio/low_min": 0.0001079088060578215, "clip_ratio/region_mean": 0.0017513987404527143, "epoch": 0.289297171186935, "grad_norm": 0.12763890624046326, "learning_rate": 1e-06, "loss": -0.0079, "step": 124 }, { "clip_ratio/high_max": 0.0021675781972589903, "clip_ratio/high_mean": 0.0008251405579358106, "clip_ratio/low_mean": 0.0005644272914651083, "clip_ratio/low_min": 3.8620859413640574e-05, "clip_ratio/region_mean": 0.0013895678712287918, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3115.0, "completions/mean_length": 719.3582763671875, "completions/mean_terminated_length": 622.439697265625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.2916302128900554, "grad_norm": 0.12874004244804382, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 19555608.0, "reward": 0.578125, "reward_std": 0.1963317096233368, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 125 }, { "clip_ratio/high_max": 0.002080680977087468, "clip_ratio/high_mean": 0.0008801951589703094, "clip_ratio/low_mean": 0.0007046141217870172, "clip_ratio/low_min": 7.819732491043396e-05, "clip_ratio/region_mean": 0.0015848092807573266, "epoch": 0.29396325459317585, "grad_norm": 0.12328380346298218, "learning_rate": 1e-06, "loss": 0.0007, "step": 126 }, { "clip_ratio/high_max": 0.0022875986323924735, "clip_ratio/high_mean": 0.0009358964089187793, "clip_ratio/low_mean": 0.0007387275545625016, "clip_ratio/low_min": 5.678502111550188e-05, "clip_ratio/region_mean": 0.0016746239707572386, "epoch": 0.2962962962962963, "grad_norm": 0.12323366105556488, "learning_rate": 1e-06, "loss": 0.0006, "step": 127 }, { "clip_ratio/high_max": 0.002229322293715086, "clip_ratio/high_mean": 0.0009242555770470062, "clip_ratio/low_mean": 0.000745340312278131, "clip_ratio/low_min": 4.2389624468341935e-05, "clip_ratio/region_mean": 0.0016695959056960419, "epoch": 0.29862933799941677, "grad_norm": 0.12143292278051376, "learning_rate": 1e-06, "loss": 0.0005, "step": 128 }, { "clip_ratio/high_max": 0.002339541257242672, "clip_ratio/high_mean": 0.0009401448533026269, "clip_ratio/low_mean": 0.0005960462058283156, "clip_ratio/low_min": 1.5225335118884686e-05, "clip_ratio/region_mean": 0.0015361910554929636, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2477.0, "completions/mean_length": 654.325927734375, "completions/mean_terminated_length": 567.693359375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.3009623797025372, "grad_norm": 0.14676012098789215, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 20138308.0, "reward": 0.5714285969734192, "reward_std": 0.19468063116073608, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 129 }, { "clip_ratio/high_max": 0.002325139124877751, "clip_ratio/high_mean": 0.0010271547507727519, "clip_ratio/low_mean": 0.0007374870674539125, "clip_ratio/low_min": 3.389239827811252e-05, "clip_ratio/region_mean": 0.0017646418564254418, "epoch": 0.30329542140565763, "grad_norm": 0.14097058773040771, "learning_rate": 1e-06, "loss": 0.0061, "step": 130 }, { "clip_ratio/high_max": 0.002222369526862167, "clip_ratio/high_mean": 0.0010043207548733335, "clip_ratio/low_mean": 0.0007855225940147648, "clip_ratio/low_min": 5.6448303439537995e-05, "clip_ratio/region_mean": 0.0017898433579830453, "epoch": 0.30562846310877806, "grad_norm": 0.14064575731754303, "learning_rate": 1e-06, "loss": 0.0061, "step": 131 }, { "clip_ratio/high_max": 0.0028451970210880972, "clip_ratio/high_mean": 0.0012023734852846246, "clip_ratio/low_mean": 0.0008807345639070263, "clip_ratio/low_min": 3.4181022783741355e-05, "clip_ratio/region_mean": 0.002083108018268831, "epoch": 0.3079615048118985, "grad_norm": 0.13920998573303223, "learning_rate": 1e-06, "loss": 0.0059, "step": 132 }, { "clip_ratio/high_max": 0.002074562002235325, "clip_ratio/high_mean": 0.0008266808781627333, "clip_ratio/low_mean": 0.0007156938008847646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001542374669952551, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 746.708740234375, "completions/mean_terminated_length": 630.6824340820312, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.310294546515019, "grad_norm": 0.12806402146816254, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 20767871.0, "reward": 0.5569196939468384, "reward_std": 0.20918434858322144, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 133 }, { "clip_ratio/high_max": 0.0023423739803547505, "clip_ratio/high_mean": 0.0008511146115779411, "clip_ratio/low_mean": 0.0008538950714864768, "clip_ratio/low_min": 2.5773195375222713e-05, "clip_ratio/region_mean": 0.0017050096721504815, "epoch": 0.3126275882181394, "grad_norm": 0.12568287551403046, "learning_rate": 1e-06, "loss": -0.0028, "step": 134 }, { "clip_ratio/high_max": 0.0021231973478279542, "clip_ratio/high_mean": 0.0008977587403933285, "clip_ratio/low_mean": 0.0007853490715206135, "clip_ratio/low_min": 1.7399777789250948e-05, "clip_ratio/region_mean": 0.0016831077882670797, "epoch": 0.31496062992125984, "grad_norm": 0.12344243377447128, "learning_rate": 1e-06, "loss": -0.0029, "step": 135 }, { "clip_ratio/high_max": 0.002262401656480506, "clip_ratio/high_mean": 0.0008841121898512938, "clip_ratio/low_mean": 0.0009233427481376566, "clip_ratio/low_min": 3.8600910556851886e-05, "clip_ratio/region_mean": 0.0018074549443554133, "epoch": 0.31729367162438027, "grad_norm": 0.12457186728715897, "learning_rate": 1e-06, "loss": -0.003, "step": 136 }, { "clip_ratio/high_max": 0.0017035833807312883, "clip_ratio/high_mean": 0.0006764722184016136, "clip_ratio/low_mean": 0.00061262423878361, "clip_ratio/low_min": 1.1888910194102209e-05, "clip_ratio/region_mean": 0.0012890964644611813, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 774.9442138671875, "completions/mean_terminated_length": 647.9512939453125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.31962671332750076, "grad_norm": 0.12719275057315826, "learning_rate": 1e-06, "loss": -0.014, "num_tokens": 21410709.0, "reward": 0.6171875, "reward_std": 0.18092647194862366, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 137 }, { "clip_ratio/high_max": 0.001793118855857756, "clip_ratio/high_mean": 0.0007163112604757771, "clip_ratio/low_mean": 0.0006525832704937784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001368894529150566, "epoch": 0.3219597550306212, "grad_norm": 0.12043658643960953, "learning_rate": 1e-06, "loss": -0.0141, "step": 138 }, { "clip_ratio/high_max": 0.002125433980836533, "clip_ratio/high_mean": 0.0007956805075082229, "clip_ratio/low_mean": 0.0007727206029812805, "clip_ratio/low_min": 2.1910210307396483e-05, "clip_ratio/region_mean": 0.0015684011232224293, "epoch": 0.3242927967337416, "grad_norm": 0.11943415552377701, "learning_rate": 1e-06, "loss": -0.0142, "step": 139 }, { "clip_ratio/high_max": 0.0019271391865913756, "clip_ratio/high_mean": 0.0007722183345322264, "clip_ratio/low_mean": 0.0007957258312671911, "clip_ratio/low_min": 1.2545162462629378e-05, "clip_ratio/region_mean": 0.001567944149428513, "epoch": 0.32662583843686205, "grad_norm": 0.11618844419717789, "learning_rate": 1e-06, "loss": -0.0142, "step": 140 }, { "clip_ratio/high_max": 0.002122580088325776, "clip_ratio/high_mean": 0.00077815698023187, "clip_ratio/low_mean": 0.0006361114592436934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014142684158287011, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3628.0, "completions/mean_length": 764.8326416015625, "completions/mean_terminated_length": 657.3755493164062, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.3289588801399825, "grad_norm": 0.13064908981323242, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 22074135.0, "reward": 0.578125, "reward_std": 0.20703540742397308, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 141 }, { "clip_ratio/high_max": 0.0023100773614714853, "clip_ratio/high_mean": 0.0008804631597740809, "clip_ratio/low_mean": 0.0006898274577906705, "clip_ratio/low_min": 2.217207020294154e-05, "clip_ratio/region_mean": 0.001570290609379299, "epoch": 0.33129192184310297, "grad_norm": 0.12526625394821167, "learning_rate": 1e-06, "loss": -0.0082, "step": 142 }, { "clip_ratio/high_max": 0.0023910438758321106, "clip_ratio/high_mean": 0.0009641088727221359, "clip_ratio/low_mean": 0.0007836988825147273, "clip_ratio/low_min": 3.2751601793279406e-05, "clip_ratio/region_mean": 0.0017478078079875559, "epoch": 0.3336249635462234, "grad_norm": 0.12329889833927155, "learning_rate": 1e-06, "loss": -0.0083, "step": 143 }, { "clip_ratio/high_max": 0.002503348827303853, "clip_ratio/high_mean": 0.000945994550420437, "clip_ratio/low_mean": 0.0007822333500371315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017282279368373565, "epoch": 0.3359580052493438, "grad_norm": 0.12147260457277298, "learning_rate": 1e-06, "loss": -0.0084, "step": 144 }, { "clip_ratio/high_max": 0.002724615187617019, "clip_ratio/high_mean": 0.0009305213861807715, "clip_ratio/low_mean": 0.0004744776078950963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014049989586055744, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3472.0, "completions/mean_length": 750.0614013671875, "completions/mean_terminated_length": 618.0869750976562, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.33829104695246426, "grad_norm": 0.13575716316699982, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 22683654.0, "reward": 0.574776828289032, "reward_std": 0.1903962343931198, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 145 }, { "clip_ratio/high_max": 0.002185089590057032, "clip_ratio/high_mean": 0.0008286447809950914, "clip_ratio/low_mean": 0.0005889495369046926, "clip_ratio/low_min": 1.4955730875954032e-05, "clip_ratio/region_mean": 0.0014175943288137205, "epoch": 0.34062408865558474, "grad_norm": 0.1281360387802124, "learning_rate": 1e-06, "loss": 0.0071, "step": 146 }, { "clip_ratio/high_max": 0.002632475574500859, "clip_ratio/high_mean": 0.0009169963886961341, "clip_ratio/low_mean": 0.0006690081063425168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001586004495038651, "epoch": 0.3429571303587052, "grad_norm": 0.1222074031829834, "learning_rate": 1e-06, "loss": 0.0069, "step": 147 }, { "clip_ratio/high_max": 0.00267785057076253, "clip_ratio/high_mean": 0.001006237533147214, "clip_ratio/low_mean": 0.00073460708699713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017408446219633333, "epoch": 0.3452901720618256, "grad_norm": 0.12618333101272583, "learning_rate": 1e-06, "loss": 0.0069, "step": 148 }, { "clip_ratio/high_max": 0.00190613632730674, "clip_ratio/high_mean": 0.0007203724526334554, "clip_ratio/low_mean": 0.000510818661950907, "clip_ratio/low_min": 1.030163184623234e-05, "clip_ratio/region_mean": 0.001231191105034668, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 818.0469360351562, "completions/mean_terminated_length": 640.6517333984375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.34762321376494604, "grad_norm": 0.12450145184993744, "learning_rate": 1e-06, "loss": -0.0186, "num_tokens": 23317544.0, "reward": 0.5613839626312256, "reward_std": 0.18888963758945465, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 149 }, { "clip_ratio/high_max": 0.002088987472234294, "clip_ratio/high_mean": 0.0008011059107957408, "clip_ratio/low_mean": 0.0006028610132489121, "clip_ratio/low_min": 9.02136252989294e-06, "clip_ratio/region_mean": 0.0014039668822078966, "epoch": 0.34995625546806647, "grad_norm": 0.12246193736791611, "learning_rate": 1e-06, "loss": -0.0187, "step": 150 }, { "clip_ratio/high_max": 0.0020701827234006487, "clip_ratio/high_mean": 0.0008531266612408217, "clip_ratio/low_mean": 0.0006338480789054302, "clip_ratio/low_min": 9.02136252989294e-06, "clip_ratio/region_mean": 0.0014869747174088843, "epoch": 0.35228929717118695, "grad_norm": 0.1253868043422699, "learning_rate": 1e-06, "loss": -0.0188, "step": 151 }, { "clip_ratio/high_max": 0.002418582298560068, "clip_ratio/high_mean": 0.000833303247418371, "clip_ratio/low_mean": 0.0006955755570743349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015288787908502854, "epoch": 0.3546223388743074, "grad_norm": 0.11809121072292328, "learning_rate": 1e-06, "loss": -0.0189, "step": 152 }, { "clip_ratio/high_max": 0.0019493377112667076, "clip_ratio/high_mean": 0.0007576894513476873, "clip_ratio/low_mean": 0.0006590703651454533, "clip_ratio/low_min": 2.819972905854229e-05, "clip_ratio/region_mean": 0.001416759827407077, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3491.0, "completions/mean_length": 725.9308471679688, "completions/mean_terminated_length": 641.1006469726562, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.3569553805774278, "grad_norm": 0.14228075742721558, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 23973730.0, "reward": 0.5658482313156128, "reward_std": 0.19474662840366364, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 153 }, { "clip_ratio/high_max": 0.0019538468186510727, "clip_ratio/high_mean": 0.0007908128627605038, "clip_ratio/low_mean": 0.0007207706548797432, "clip_ratio/low_min": 8.471130058751442e-06, "clip_ratio/region_mean": 0.0015115835485630669, "epoch": 0.35928842228054825, "grad_norm": 0.13228096067905426, "learning_rate": 1e-06, "loss": -0.0066, "step": 154 }, { "clip_ratio/high_max": 0.002330339870241005, "clip_ratio/high_mean": 0.0009021662208397174, "clip_ratio/low_mean": 0.0007463944275514223, "clip_ratio/low_min": 4.235565211274661e-05, "clip_ratio/region_mean": 0.0016485606392961927, "epoch": 0.36162146398366873, "grad_norm": 0.13084569573402405, "learning_rate": 1e-06, "loss": -0.0068, "step": 155 }, { "clip_ratio/high_max": 0.002105721512634773, "clip_ratio/high_mean": 0.0009035752354975557, "clip_ratio/low_mean": 0.0009649036255723331, "clip_ratio/low_min": 2.9032633392489515e-05, "clip_ratio/region_mean": 0.0018684788446989842, "epoch": 0.36395450568678916, "grad_norm": 0.12879055738449097, "learning_rate": 1e-06, "loss": -0.0068, "step": 156 }, { "clip_ratio/high_max": 0.0015746948411106132, "clip_ratio/high_mean": 0.0006798055710532935, "clip_ratio/low_mean": 0.0005838424722242053, "clip_ratio/low_min": 2.731415497692069e-05, "clip_ratio/region_mean": 0.0012636480678338557, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 793.9364013671875, "completions/mean_terminated_length": 639.6343383789062, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.3662875473899096, "grad_norm": 0.13985928893089294, "learning_rate": 1e-06, "loss": -0.0193, "num_tokens": 24623609.0, "reward": 0.5602678656578064, "reward_std": 0.18952833116054535, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 157 }, { "clip_ratio/high_max": 0.001851902954513207, "clip_ratio/high_mean": 0.000682261723341071, "clip_ratio/low_mean": 0.0007348936906055314, "clip_ratio/low_min": 3.631099025369622e-05, "clip_ratio/region_mean": 0.001417155443050433, "epoch": 0.36862058909303, "grad_norm": 0.13645322620868683, "learning_rate": 1e-06, "loss": -0.0194, "step": 158 }, { "clip_ratio/high_max": 0.0020943659255863167, "clip_ratio/high_mean": 0.0008832562361931195, "clip_ratio/low_mean": 0.0007395212705887388, "clip_ratio/low_min": 2.9459431971190497e-05, "clip_ratio/region_mean": 0.0016227774758590385, "epoch": 0.37095363079615046, "grad_norm": 0.1287633627653122, "learning_rate": 1e-06, "loss": -0.0196, "step": 159 }, { "clip_ratio/high_max": 0.0018498341160011478, "clip_ratio/high_mean": 0.0007322655565076275, "clip_ratio/low_mean": 0.0008210696341848234, "clip_ratio/low_min": 3.7399209759314544e-05, "clip_ratio/region_mean": 0.0015533351943304297, "epoch": 0.37328667249927094, "grad_norm": 0.1238635703921318, "learning_rate": 1e-06, "loss": -0.0196, "step": 160 }, { "clip_ratio/high_max": 0.00184460541640874, "clip_ratio/high_mean": 0.0007242668416438391, "clip_ratio/low_mean": 0.0008007000051293289, "clip_ratio/low_min": 5.862940815859474e-05, "clip_ratio/region_mean": 0.0015249668431351893, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3682.0, "completions/mean_length": 879.4788208007812, "completions/mean_terminated_length": 713.3673706054688, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.3756197142023914, "grad_norm": 0.1327405571937561, "learning_rate": 1e-06, "loss": -0.0121, "num_tokens": 25333182.0, "reward": 0.5167410969734192, "reward_std": 0.22410036623477936, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 161 }, { "clip_ratio/high_max": 0.0020582498254952952, "clip_ratio/high_mean": 0.000758121374019538, "clip_ratio/low_mean": 0.0008528279886377277, "clip_ratio/low_min": 7.029004973446717e-05, "clip_ratio/region_mean": 0.0016109493881231174, "epoch": 0.3779527559055118, "grad_norm": 0.1290382295846939, "learning_rate": 1e-06, "loss": -0.0123, "step": 162 }, { "clip_ratio/high_max": 0.0019149773215758614, "clip_ratio/high_mean": 0.000807310025265906, "clip_ratio/low_mean": 0.0009338400886917952, "clip_ratio/low_min": 8.590506240579998e-05, "clip_ratio/region_mean": 0.0017411500739399344, "epoch": 0.38028579760863224, "grad_norm": 0.13059088587760925, "learning_rate": 1e-06, "loss": -0.0123, "step": 163 }, { "clip_ratio/high_max": 0.0021376777585828677, "clip_ratio/high_mean": 0.0008885195493348874, "clip_ratio/low_mean": 0.0010482841644261498, "clip_ratio/low_min": 0.000126584418467246, "clip_ratio/region_mean": 0.0019368036955711432, "epoch": 0.3826188393117527, "grad_norm": 0.12513965368270874, "learning_rate": 1e-06, "loss": -0.0124, "step": 164 }, { "clip_ratio/high_max": 0.0016817022988107055, "clip_ratio/high_mean": 0.0006793460934204631, "clip_ratio/low_mean": 0.00046086348720564274, "clip_ratio/low_min": 1.2590652659127954e-05, "clip_ratio/region_mean": 0.0011402095606172225, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 778.8438110351562, "completions/mean_terminated_length": 635.9627685546875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.38495188101487315, "grad_norm": 0.13212376832962036, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 25970674.0, "reward": 0.5368303656578064, "reward_std": 0.18077422678470612, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 165 }, { "clip_ratio/high_max": 0.0022930297564016655, "clip_ratio/high_mean": 0.0008691501479916042, "clip_ratio/low_mean": 0.0005739691732742358, "clip_ratio/low_min": 3.3840196920209564e-05, "clip_ratio/region_mean": 0.0014431193521886598, "epoch": 0.3872849227179936, "grad_norm": 0.1238618865609169, "learning_rate": 1e-06, "loss": -0.0032, "step": 166 }, { "clip_ratio/high_max": 0.0022254820833040867, "clip_ratio/high_mean": 0.0008842927345540375, "clip_ratio/low_mean": 0.0006689161027679802, "clip_ratio/low_min": 4.755396457767347e-05, "clip_ratio/region_mean": 0.0015532088254985865, "epoch": 0.389617964421114, "grad_norm": 0.12629103660583496, "learning_rate": 1e-06, "loss": -0.0033, "step": 167 }, { "clip_ratio/high_max": 0.0022610036648984533, "clip_ratio/high_mean": 0.0008592117410444189, "clip_ratio/low_mean": 0.0006936555528227473, "clip_ratio/low_min": 2.273588688694872e-05, "clip_ratio/region_mean": 0.0015528673357039224, "epoch": 0.39195100612423445, "grad_norm": 0.12537162005901337, "learning_rate": 1e-06, "loss": -0.0034, "step": 168 }, { "clip_ratio/high_max": 0.0017853228564490564, "clip_ratio/high_mean": 0.0006730290733685251, "clip_ratio/low_mean": 0.000531001284798549, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012040303336107172, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 786.5301513671875, "completions/mean_terminated_length": 659.9802856445312, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 0.39428404782735493, "grad_norm": 0.12514810264110565, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 26626373.0, "reward": 0.5814732313156128, "reward_std": 0.19271966814994812, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 169 }, { "clip_ratio/high_max": 0.0018870870626415126, "clip_ratio/high_mean": 0.0007161348257795908, "clip_ratio/low_mean": 0.0005770080551883439, "clip_ratio/low_min": 1.387963584420504e-05, "clip_ratio/region_mean": 0.0012931428973388392, "epoch": 0.39661708953047536, "grad_norm": 0.12092510610818863, "learning_rate": 1e-06, "loss": 0.0068, "step": 170 }, { "clip_ratio/high_max": 0.0019517243599693757, "clip_ratio/high_mean": 0.000787295693953638, "clip_ratio/low_mean": 0.0006512152140203398, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014385109061549883, "epoch": 0.3989501312335958, "grad_norm": 0.11815723031759262, "learning_rate": 1e-06, "loss": 0.0067, "step": 171 }, { "clip_ratio/high_max": 0.002110590663505718, "clip_ratio/high_mean": 0.0008060213622229639, "clip_ratio/low_mean": 0.000834345299153938, "clip_ratio/low_min": 2.3625023459317163e-05, "clip_ratio/region_mean": 0.0016403667104896158, "epoch": 0.4012831729367162, "grad_norm": 0.11461564153432846, "learning_rate": 1e-06, "loss": 0.0066, "step": 172 }, { "clip_ratio/high_max": 0.0018739587612799369, "clip_ratio/high_mean": 0.0008143357408698648, "clip_ratio/low_mean": 0.0005802317482448416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013945674836577382, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3347.0, "completions/mean_length": 674.8928833007812, "completions/mean_terminated_length": 568.598388671875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.4036162146398367, "grad_norm": 0.1492396742105484, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 27206677.0, "reward": 0.6305803656578064, "reward_std": 0.1899854838848114, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 173 }, { "clip_ratio/high_max": 0.002043466462055221, "clip_ratio/high_mean": 0.0007959864706208464, "clip_ratio/low_mean": 0.0006736974482919322, "clip_ratio/low_min": 3.210840714018559e-05, "clip_ratio/region_mean": 0.0014696838989038952, "epoch": 0.40594925634295714, "grad_norm": 0.14456047117710114, "learning_rate": 1e-06, "loss": -0.0029, "step": 174 }, { "clip_ratio/high_max": 0.00228326039359672, "clip_ratio/high_mean": 0.0008906363036658149, "clip_ratio/low_mean": 0.0007616474686074071, "clip_ratio/low_min": 5.593298556050286e-05, "clip_ratio/region_mean": 0.0016522837686352432, "epoch": 0.4082822980460776, "grad_norm": 0.13371388614177704, "learning_rate": 1e-06, "loss": -0.0031, "step": 175 }, { "clip_ratio/high_max": 0.0022177738428581506, "clip_ratio/high_mean": 0.0009611998066247907, "clip_ratio/low_mean": 0.000822845129732741, "clip_ratio/low_min": 5.053765562479384e-05, "clip_ratio/region_mean": 0.0017840449218056165, "epoch": 0.410615339749198, "grad_norm": 0.1360509842634201, "learning_rate": 1e-06, "loss": -0.0032, "step": 176 }, { "clip_ratio/high_max": 0.002124106278643012, "clip_ratio/high_mean": 0.0008225907986343373, "clip_ratio/low_mean": 0.0005985241568851052, "clip_ratio/low_min": 9.040937584359199e-06, "clip_ratio/region_mean": 0.0014211149864422623, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3423.0, "completions/mean_length": 892.2913208007812, "completions/mean_terminated_length": 678.7107543945312, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.41294838145231844, "grad_norm": 0.1452936828136444, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 27870186.0, "reward": 0.5602678656578064, "reward_std": 0.20553021132946014, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 177 }, { "clip_ratio/high_max": 0.002294248108228203, "clip_ratio/high_mean": 0.0008501646425429499, "clip_ratio/low_mean": 0.0006722501384501811, "clip_ratio/low_min": 2.3255814085132442e-05, "clip_ratio/region_mean": 0.001522414808277972, "epoch": 0.4152814231554389, "grad_norm": 0.12737055122852325, "learning_rate": 1e-06, "loss": -0.0033, "step": 178 }, { "clip_ratio/high_max": 0.002159130664949771, "clip_ratio/high_mean": 0.0008800446594250388, "clip_ratio/low_mean": 0.0007663468113605632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016463914653286338, "epoch": 0.41761446485855935, "grad_norm": 0.12789689004421234, "learning_rate": 1e-06, "loss": -0.0034, "step": 179 }, { "clip_ratio/high_max": 0.002041347252088599, "clip_ratio/high_mean": 0.0008498054048686754, "clip_ratio/low_mean": 0.0008477169776597293, "clip_ratio/low_min": 3.719500091392547e-05, "clip_ratio/region_mean": 0.001697522406175267, "epoch": 0.4199475065616798, "grad_norm": 0.12442026287317276, "learning_rate": 1e-06, "loss": -0.0035, "step": 180 }, { "clip_ratio/high_max": 0.0019027201778953895, "clip_ratio/high_mean": 0.0007205671536212321, "clip_ratio/low_mean": 0.0006100571554270573, "clip_ratio/low_min": 9.326810777565697e-05, "clip_ratio/region_mean": 0.0013306243490660563, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3350.0, "completions/mean_length": 823.966552734375, "completions/mean_terminated_length": 718.4170532226562, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.4222805482648002, "grad_norm": 0.129314124584198, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 28582220.0, "reward": 0.5881696939468384, "reward_std": 0.2040320485830307, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 181 }, { "clip_ratio/high_max": 0.002060704668110702, "clip_ratio/high_mean": 0.0007384509608527878, "clip_ratio/low_mean": 0.0006915073245181702, "clip_ratio/low_min": 6.692930037388578e-05, "clip_ratio/region_mean": 0.0014299583017418627, "epoch": 0.4246135899679207, "grad_norm": 0.12345974147319794, "learning_rate": 1e-06, "loss": -0.0077, "step": 182 }, { "clip_ratio/high_max": 0.0019877305785485078, "clip_ratio/high_mean": 0.0007581515274068806, "clip_ratio/low_mean": 0.0007056527174427174, "clip_ratio/low_min": 2.8040110009897035e-05, "clip_ratio/region_mean": 0.0014638042521255556, "epoch": 0.42694663167104113, "grad_norm": 0.12144620716571808, "learning_rate": 1e-06, "loss": -0.0078, "step": 183 }, { "clip_ratio/high_max": 0.0024087979982141405, "clip_ratio/high_mean": 0.0008610365421191091, "clip_ratio/low_mean": 0.0009245934470527573, "clip_ratio/low_min": 9.265759308618726e-05, "clip_ratio/region_mean": 0.001785629996447824, "epoch": 0.42927967337416156, "grad_norm": 0.11682870984077454, "learning_rate": 1e-06, "loss": -0.0079, "step": 184 }, { "clip_ratio/high_max": 0.0020106467163714115, "clip_ratio/high_mean": 0.0007398923835353344, "clip_ratio/low_mean": 0.0005716584437323036, "clip_ratio/low_min": 3.746440779650584e-05, "clip_ratio/region_mean": 0.001311550811806228, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3989.0, "completions/mean_length": 838.8582763671875, "completions/mean_terminated_length": 694.6026000976562, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.431612715077282, "grad_norm": 0.14958661794662476, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 29263085.0, "reward": 0.5580357313156128, "reward_std": 0.18321877717971802, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 185 }, { "clip_ratio/high_max": 0.0017594487399037462, "clip_ratio/high_mean": 0.0006725644489051774, "clip_ratio/low_mean": 0.0006287496998993447, "clip_ratio/low_min": 2.8168079552415293e-05, "clip_ratio/region_mean": 0.001301314143347554, "epoch": 0.4339457567804024, "grad_norm": 0.140171617269516, "learning_rate": 1e-06, "loss": 0.0137, "step": 186 }, { "clip_ratio/high_max": 0.002286356386321131, "clip_ratio/high_mean": 0.0007630773252458312, "clip_ratio/low_mean": 0.0007503039378207177, "clip_ratio/low_min": 6.248682439036202e-05, "clip_ratio/region_mean": 0.0015133812739804853, "epoch": 0.4362787984835229, "grad_norm": 0.1305026262998581, "learning_rate": 1e-06, "loss": 0.0136, "step": 187 }, { "clip_ratio/high_max": 0.002336096396902576, "clip_ratio/high_mean": 0.0008277772612927947, "clip_ratio/low_mean": 0.0007938727831060532, "clip_ratio/low_min": 2.8168079552415293e-05, "clip_ratio/region_mean": 0.0016216500371228904, "epoch": 0.43861184018664334, "grad_norm": 0.24401448667049408, "learning_rate": 1e-06, "loss": 0.0135, "step": 188 }, { "clip_ratio/high_max": 0.0017940857069334015, "clip_ratio/high_mean": 0.0007045862730592489, "clip_ratio/low_mean": 0.0005715291481465101, "clip_ratio/low_min": 5.0179610298073385e-05, "clip_ratio/region_mean": 0.001276115439395653, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 769.1484985351562, "completions/mean_terminated_length": 641.9339599609375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.4409448818897638, "grad_norm": 0.14372509717941284, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 29911722.0, "reward": 0.527901828289032, "reward_std": 0.16383378207683563, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 189 }, { "clip_ratio/high_max": 0.0020170461139059626, "clip_ratio/high_mean": 0.0007346528773268801, "clip_ratio/low_mean": 0.000671168108965503, "clip_ratio/low_min": 5.0430287956260145e-05, "clip_ratio/region_mean": 0.001405820992658846, "epoch": 0.4432779235928842, "grad_norm": 0.13657039403915405, "learning_rate": 1e-06, "loss": -0.0055, "step": 190 }, { "clip_ratio/high_max": 0.002069996808131691, "clip_ratio/high_mean": 0.0007594603575853398, "clip_ratio/low_mean": 0.0007773432280373527, "clip_ratio/low_min": 0.00011093665398220764, "clip_ratio/region_mean": 0.0015368036365543958, "epoch": 0.4456109652960047, "grad_norm": 0.13015340268611908, "learning_rate": 1e-06, "loss": -0.0056, "step": 191 }, { "clip_ratio/high_max": 0.0024807835070532747, "clip_ratio/high_mean": 0.0008477457049593795, "clip_ratio/low_mean": 0.0009115721677517286, "clip_ratio/low_min": 0.00013439590838970616, "clip_ratio/region_mean": 0.0017593178854440339, "epoch": 0.4479440069991251, "grad_norm": 0.12345610558986664, "learning_rate": 1e-06, "loss": -0.0057, "step": 192 }, { "clip_ratio/high_max": 0.002059784186712932, "clip_ratio/high_mean": 0.0007039611591608264, "clip_ratio/low_mean": 0.00044292151869740337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011468826705822721, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3276.0, "completions/mean_length": 841.3225708007812, "completions/mean_terminated_length": 665.18701171875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.45027704870224555, "grad_norm": 0.13144679367542267, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 30560147.0, "reward": 0.5334821939468384, "reward_std": 0.16375526785850525, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 193 }, { "clip_ratio/high_max": 0.0018594775683595799, "clip_ratio/high_mean": 0.0006484556197392521, "clip_ratio/low_mean": 0.0005304711648932425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011789267809945159, "epoch": 0.452610090405366, "grad_norm": 0.13114747405052185, "learning_rate": 1e-06, "loss": -0.0018, "step": 194 }, { "clip_ratio/high_max": 0.0020697786021628417, "clip_ratio/high_mean": 0.0007702046586928191, "clip_ratio/low_mean": 0.0006775559222660377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014477605509455316, "epoch": 0.4549431321084864, "grad_norm": 0.11965741217136383, "learning_rate": 1e-06, "loss": -0.0019, "step": 195 }, { "clip_ratio/high_max": 0.0020117703825235367, "clip_ratio/high_mean": 0.0008047625196923036, "clip_ratio/low_mean": 0.0007498264167224988, "clip_ratio/low_min": 8.457374860881828e-06, "clip_ratio/region_mean": 0.0015545889473287389, "epoch": 0.4572761738116069, "grad_norm": 0.11865903437137604, "learning_rate": 1e-06, "loss": -0.002, "step": 196 }, { "clip_ratio/high_max": 0.001973409322090447, "clip_ratio/high_mean": 0.0008260949562099995, "clip_ratio/low_mean": 0.0005826244578202022, "clip_ratio/low_min": 3.357206878717989e-05, "clip_ratio/region_mean": 0.0014087194176681805, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3378.0, "completions/mean_length": 865.6563110351562, "completions/mean_terminated_length": 617.1682739257812, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.45960921551472733, "grad_norm": 0.1397593766450882, "learning_rate": 1e-06, "loss": -0.0184, "num_tokens": 31176167.0, "reward": 0.5837053656578064, "reward_std": 0.17923468351364136, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 197 }, { "clip_ratio/high_max": 0.0021797248773509637, "clip_ratio/high_mean": 0.0008783432185737183, "clip_ratio/low_mean": 0.0006023327887305641, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014806760191277135, "epoch": 0.46194225721784776, "grad_norm": 0.12912380695343018, "learning_rate": 1e-06, "loss": -0.0185, "step": 198 }, { "clip_ratio/high_max": 0.0023172804503701627, "clip_ratio/high_mean": 0.0009436652053409489, "clip_ratio/low_mean": 0.0007070937062962912, "clip_ratio/low_min": 3.994886719738133e-05, "clip_ratio/region_mean": 0.0016507589389220811, "epoch": 0.4642752989209682, "grad_norm": 0.13248147070407867, "learning_rate": 1e-06, "loss": -0.0186, "step": 199 }, { "clip_ratio/high_max": 0.0022325859463308007, "clip_ratio/high_mean": 0.00096759192092577, "clip_ratio/low_mean": 0.0008682969000801677, "clip_ratio/low_min": 1.9974433598690666e-05, "clip_ratio/region_mean": 0.0018358888119109906, "epoch": 0.4666083406240887, "grad_norm": 0.12580174207687378, "learning_rate": 1e-06, "loss": -0.0187, "step": 200 }, { "clip_ratio/high_max": 0.0024492942029610276, "clip_ratio/high_mean": 0.001023486904159654, "clip_ratio/low_mean": 0.0004934865710310987, "clip_ratio/low_min": 1.4169122550811153e-05, "clip_ratio/region_mean": 0.0015169734688242897, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3804.0, "completions/mean_length": 859.7589721679688, "completions/mean_terminated_length": 660.3696899414062, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.4689413823272091, "grad_norm": 0.15095102787017822, "learning_rate": 1e-06, "loss": -0.0313, "num_tokens": 31814727.0, "reward": 0.6127232313156128, "reward_std": 0.20899143815040588, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 201 }, { "clip_ratio/high_max": 0.0025123211635218468, "clip_ratio/high_mean": 0.0010872416605707258, "clip_ratio/low_mean": 0.0006514722354040714, "clip_ratio/low_min": 3.0222437999327667e-05, "clip_ratio/region_mean": 0.0017387139305355959, "epoch": 0.47127442403032954, "grad_norm": 0.1395331770181656, "learning_rate": 1e-06, "loss": -0.0316, "step": 202 }, { "clip_ratio/high_max": 0.0027311075245961547, "clip_ratio/high_mean": 0.00116669383351109, "clip_ratio/low_mean": 0.0007452905229001772, "clip_ratio/low_min": 8.015868206712184e-05, "clip_ratio/region_mean": 0.0019119843418593518, "epoch": 0.47360746573345, "grad_norm": 0.13413439691066742, "learning_rate": 1e-06, "loss": -0.0316, "step": 203 }, { "clip_ratio/high_max": 0.0030890208508935757, "clip_ratio/high_mean": 0.0012640648164961021, "clip_ratio/low_mean": 0.0008750537090236321, "clip_ratio/low_min": 6.598955860681599e-05, "clip_ratio/region_mean": 0.0021391185364336707, "epoch": 0.4759405074365704, "grad_norm": 0.13072900474071503, "learning_rate": 1e-06, "loss": -0.0317, "step": 204 }, { "clip_ratio/high_max": 0.0024731029916438274, "clip_ratio/high_mean": 0.0009719484314700821, "clip_ratio/low_mean": 0.0005394243362388806, "clip_ratio/low_min": 1.9207129298592918e-05, "clip_ratio/region_mean": 0.0015113727567950264, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 762.114990234375, "completions/mean_terminated_length": 606.325927734375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.4782735491396909, "grad_norm": 0.15689398348331451, "learning_rate": 1e-06, "loss": -0.0126, "num_tokens": 32413430.0, "reward": 0.6540178656578064, "reward_std": 0.18352049589157104, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 205 }, { "clip_ratio/high_max": 0.002340481718420051, "clip_ratio/high_mean": 0.0009258022837457247, "clip_ratio/low_mean": 0.000656379442261823, "clip_ratio/low_min": 3.124024442513473e-05, "clip_ratio/region_mean": 0.001582181706908159, "epoch": 0.4806065908428113, "grad_norm": 0.1418277472257614, "learning_rate": 1e-06, "loss": -0.0127, "step": 206 }, { "clip_ratio/high_max": 0.002562680972914677, "clip_ratio/high_mean": 0.0010215032307314686, "clip_ratio/low_mean": 0.0006995973126322497, "clip_ratio/low_min": 9.615028466214426e-05, "clip_ratio/region_mean": 0.0017211005797435064, "epoch": 0.48293963254593175, "grad_norm": 0.1463276594877243, "learning_rate": 1e-06, "loss": -0.0129, "step": 207 }, { "clip_ratio/high_max": 0.0033894631633302197, "clip_ratio/high_mean": 0.0012309082667343318, "clip_ratio/low_mean": 0.0008384940992982592, "clip_ratio/low_min": 6.491004205599893e-05, "clip_ratio/region_mean": 0.0020694024133263156, "epoch": 0.4852726742490522, "grad_norm": 0.1418519765138626, "learning_rate": 1e-06, "loss": -0.013, "step": 208 }, { "clip_ratio/high_max": 0.0018435279271216132, "clip_ratio/high_mean": 0.0007196783644758398, "clip_ratio/low_mean": 0.0006421881944334018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013618665252579376, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 783.1473388671875, "completions/mean_terminated_length": 607.9671020507812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.48760571595217267, "grad_norm": 5.928381443023682, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 33016562.0, "reward": 0.5803571939468384, "reward_std": 0.17623132467269897, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 209 }, { "clip_ratio/high_max": 0.0017898829537443817, "clip_ratio/high_mean": 0.0007453729504049988, "clip_ratio/low_mean": 0.0007343273209698964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014797002950217575, "epoch": 0.4899387576552931, "grad_norm": 0.1544238179922104, "learning_rate": 1e-06, "loss": -0.0024, "step": 210 }, { "clip_ratio/high_max": 0.002238260833109962, "clip_ratio/high_mean": 0.0007978399080457166, "clip_ratio/low_mean": 0.0007594705894007348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015573105265502818, "epoch": 0.49227179935841353, "grad_norm": 0.1455371230840683, "learning_rate": 1e-06, "loss": -0.0025, "step": 211 }, { "clip_ratio/high_max": 0.00214235905878013, "clip_ratio/high_mean": 0.0008235550321842311, "clip_ratio/low_mean": 0.0008549741214665119, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016785291409178171, "epoch": 0.49460484106153396, "grad_norm": 0.13842235505580902, "learning_rate": 1e-06, "loss": -0.0027, "step": 212 }, { "clip_ratio/high_max": 0.0017898878395499196, "clip_ratio/high_mean": 0.0006285424624365987, "clip_ratio/low_mean": 0.0006639106959482888, "clip_ratio/low_min": 3.608545011957176e-05, "clip_ratio/region_mean": 0.0012924531511089299, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3151.0, "completions/mean_length": 702.2935791015625, "completions/mean_terminated_length": 547.8541259765625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.4969378827646544, "grad_norm": 0.1554422825574875, "learning_rate": 1e-06, "loss": 0.012, "num_tokens": 33575913.0, "reward": 0.5625, "reward_std": 0.16604438424110413, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 213 }, { "clip_ratio/high_max": 0.0019248179687565425, "clip_ratio/high_mean": 0.0006827788706686988, "clip_ratio/low_mean": 0.0008430743982899003, "clip_ratio/low_min": 7.887066749390215e-05, "clip_ratio/region_mean": 0.001525853276689304, "epoch": 0.4992709244677749, "grad_norm": 0.14568105340003967, "learning_rate": 1e-06, "loss": 0.0119, "step": 214 }, { "clip_ratio/high_max": 0.0017952773378056008, "clip_ratio/high_mean": 0.0007246034329000395, "clip_ratio/low_mean": 0.0009183736910927109, "clip_ratio/low_min": 4.517867273534648e-05, "clip_ratio/region_mean": 0.0016429771276307292, "epoch": 0.5016039661708953, "grad_norm": 0.1421547681093216, "learning_rate": 1e-06, "loss": 0.0118, "step": 215 }, { "clip_ratio/high_max": 0.0019080131496593822, "clip_ratio/high_mean": 0.0007503183014705428, "clip_ratio/low_mean": 0.0010296792552253464, "clip_ratio/low_min": 9.249549475498497e-05, "clip_ratio/region_mean": 0.0017799975903471932, "epoch": 0.5039370078740157, "grad_norm": 0.13322584331035614, "learning_rate": 1e-06, "loss": 0.0116, "step": 216 }, { "clip_ratio/high_max": 0.0017685059137875214, "clip_ratio/high_mean": 0.0008174906397471204, "clip_ratio/low_mean": 0.0007470436903531663, "clip_ratio/low_min": 7.736551015113946e-05, "clip_ratio/region_mean": 0.0015645343592041172, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3331.0, "completions/mean_length": 719.677490234375, "completions/mean_terminated_length": 606.743896484375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.5062700495771362, "grad_norm": 0.1658407598733902, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 34196352.0, "reward": 0.598214328289032, "reward_std": 0.2054649144411087, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 217 }, { "clip_ratio/high_max": 0.0024214774239226244, "clip_ratio/high_mean": 0.0009799158469832037, "clip_ratio/low_mean": 0.000788766663390561, "clip_ratio/low_min": 2.599880735942861e-05, "clip_ratio/region_mean": 0.00176868249400286, "epoch": 0.5086030912802566, "grad_norm": 0.1649574339389801, "learning_rate": 1e-06, "loss": -0.0008, "step": 218 }, { "clip_ratio/high_max": 0.0023510167084168643, "clip_ratio/high_mean": 0.0010496319428057177, "clip_ratio/low_mean": 0.0010225755686406046, "clip_ratio/low_min": 6.154934726509964e-05, "clip_ratio/region_mean": 0.0020722074841614813, "epoch": 0.510936132983377, "grad_norm": 0.15084660053253174, "learning_rate": 1e-06, "loss": -0.001, "step": 219 }, { "clip_ratio/high_max": 0.002698348558624275, "clip_ratio/high_mean": 0.0011581605722312815, "clip_ratio/low_mean": 0.001177403966721613, "clip_ratio/low_min": 0.00012235831582074752, "clip_ratio/region_mean": 0.0023355645607807674, "epoch": 0.5132691746864976, "grad_norm": 0.14769120514392853, "learning_rate": 1e-06, "loss": -0.0011, "step": 220 }, { "clip_ratio/high_max": 0.0019716924471140373, "clip_ratio/high_mean": 0.0006754030018782942, "clip_ratio/low_mean": 0.0005095736109979043, "clip_ratio/low_min": 1.4381040273292456e-05, "clip_ratio/region_mean": 0.0011849766015075147, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 804.4486694335938, "completions/mean_terminated_length": 654.6581420898438, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.515602216389618, "grad_norm": 0.14115387201309204, "learning_rate": 1e-06, "loss": -0.008, "num_tokens": 34848802.0, "reward": 0.543526828289032, "reward_std": 0.1671716272830963, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838003516197205, "step": 221 }, { "clip_ratio/high_max": 0.0018091392339556478, "clip_ratio/high_mean": 0.0006634820983890677, "clip_ratio/low_mean": 0.0005886766448384151, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012521587377705146, "epoch": 0.5179352580927384, "grad_norm": 0.12984482944011688, "learning_rate": 1e-06, "loss": -0.0081, "step": 222 }, { "clip_ratio/high_max": 0.002020530475419946, "clip_ratio/high_mean": 0.000810093524705735, "clip_ratio/low_mean": 0.0006467228358815191, "clip_ratio/low_min": 1.5679879652452655e-05, "clip_ratio/region_mean": 0.0014568163314834237, "epoch": 0.5202682997958589, "grad_norm": 0.13209229707717896, "learning_rate": 1e-06, "loss": -0.0082, "step": 223 }, { "clip_ratio/high_max": 0.0021202827992965467, "clip_ratio/high_mean": 0.0008127968831104226, "clip_ratio/low_mean": 0.0007972047105795355, "clip_ratio/low_min": 2.351981856918428e-05, "clip_ratio/region_mean": 0.001610001600056421, "epoch": 0.5226013414989793, "grad_norm": 0.12124840170145035, "learning_rate": 1e-06, "loss": -0.0083, "step": 224 }, { "clip_ratio/high_max": 0.0021031460055382922, "clip_ratio/high_mean": 0.0007448032674801652, "clip_ratio/low_mean": 0.0005476557635120116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012924590373586398, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3864.0, "completions/mean_length": 785.4676513671875, "completions/mean_terminated_length": 581.501220703125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.5249343832020997, "grad_norm": 0.15625295042991638, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 35426341.0, "reward": 0.5892857313156128, "reward_std": 0.16176040470600128, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 225 }, { "clip_ratio/high_max": 0.0018296801863471046, "clip_ratio/high_mean": 0.0007326061349886004, "clip_ratio/low_mean": 0.0006696919008390978, "clip_ratio/low_min": 1.9160024748998694e-05, "clip_ratio/region_mean": 0.0014022980285517406, "epoch": 0.5272674249052202, "grad_norm": 0.2065250426530838, "learning_rate": 1e-06, "loss": -0.002, "step": 226 }, { "clip_ratio/high_max": 0.00237767917860765, "clip_ratio/high_mean": 0.0008552605468139518, "clip_ratio/low_mean": 0.0007679162226850167, "clip_ratio/low_min": 2.181120180466678e-05, "clip_ratio/region_mean": 0.0016231767876888625, "epoch": 0.5296004666083406, "grad_norm": 0.13795067369937897, "learning_rate": 1e-06, "loss": -0.0021, "step": 227 }, { "clip_ratio/high_max": 0.0020998619875172153, "clip_ratio/high_mean": 0.000840004007841344, "clip_ratio/low_mean": 0.0009554424141242635, "clip_ratio/low_min": 3.271680179750547e-05, "clip_ratio/region_mean": 0.0017954464128706604, "epoch": 0.531933508311461, "grad_norm": 0.13355492055416107, "learning_rate": 1e-06, "loss": -0.0022, "step": 228 }, { "clip_ratio/high_max": 0.0019174024200765416, "clip_ratio/high_mean": 0.000839916778204497, "clip_ratio/low_mean": 0.0006783026192351826, "clip_ratio/low_min": 1.2462612176022958e-05, "clip_ratio/region_mean": 0.001518219432909973, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3926.0, "completions/mean_length": 737.0335083007812, "completions/mean_terminated_length": 624.6804809570312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.5342665500145816, "grad_norm": 0.16933506727218628, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 36057803.0, "reward": 0.59375, "reward_std": 0.19332514703273773, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 229 }, { "clip_ratio/high_max": 0.0023243574069056194, "clip_ratio/high_mean": 0.0009935677589965053, "clip_ratio/low_mean": 0.0007875768660596805, "clip_ratio/low_min": 2.8766482500941493e-05, "clip_ratio/region_mean": 0.0017811446450650692, "epoch": 0.536599591717702, "grad_norm": 0.15548740327358246, "learning_rate": 1e-06, "loss": 0.0053, "step": 230 }, { "clip_ratio/high_max": 0.0021758751972811297, "clip_ratio/high_mean": 0.0010291633534507127, "clip_ratio/low_mean": 0.0009182637631965918, "clip_ratio/low_min": 6.30091963103041e-05, "clip_ratio/region_mean": 0.001947427139384672, "epoch": 0.5389326334208224, "grad_norm": 0.1545095443725586, "learning_rate": 1e-06, "loss": 0.0053, "step": 231 }, { "clip_ratio/high_max": 0.0026413014420541003, "clip_ratio/high_mean": 0.0012076333332515787, "clip_ratio/low_mean": 0.0010082296557811787, "clip_ratio/low_min": 4.5635513743036427e-05, "clip_ratio/region_mean": 0.0022158630017656833, "epoch": 0.5412656751239429, "grad_norm": 0.17263031005859375, "learning_rate": 1e-06, "loss": 0.0051, "step": 232 }, { "clip_ratio/high_max": 0.0015943119542498607, "clip_ratio/high_mean": 0.0005446503100756672, "clip_ratio/low_mean": 0.0005840461008119746, "clip_ratio/low_min": 2.462892371113412e-05, "clip_ratio/region_mean": 0.001128696407249663, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 785.0279541015625, "completions/mean_terminated_length": 597.6143798828125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.5435987168270633, "grad_norm": 0.13965320587158203, "learning_rate": 1e-06, "loss": -0.0188, "num_tokens": 36658724.0, "reward": 0.6049107313156128, "reward_std": 0.12512750923633575, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914289474487305, "step": 233 }, { "clip_ratio/high_max": 0.0018843789403035771, "clip_ratio/high_mean": 0.0006698274164591567, "clip_ratio/low_mean": 0.0005825686821481213, "clip_ratio/low_min": 1.6419282474089414e-05, "clip_ratio/region_mean": 0.001252396101335762, "epoch": 0.5459317585301837, "grad_norm": 0.12933647632598877, "learning_rate": 1e-06, "loss": -0.019, "step": 234 }, { "clip_ratio/high_max": 0.0018202779319835827, "clip_ratio/high_mean": 0.000629704834864242, "clip_ratio/low_mean": 0.0007327547336899443, "clip_ratio/low_min": 2.9110386094544083e-05, "clip_ratio/region_mean": 0.0013624595776491333, "epoch": 0.5482648002333042, "grad_norm": 0.13066346943378448, "learning_rate": 1e-06, "loss": -0.019, "step": 235 }, { "clip_ratio/high_max": 0.0021653471412719227, "clip_ratio/high_mean": 0.0007388412832369795, "clip_ratio/low_mean": 0.0007796467016305542, "clip_ratio/low_min": 2.462892371113412e-05, "clip_ratio/region_mean": 0.0015184880212473217, "epoch": 0.5505978419364246, "grad_norm": 0.12001126259565353, "learning_rate": 1e-06, "loss": -0.0192, "step": 236 }, { "clip_ratio/high_max": 0.001686602507106727, "clip_ratio/high_mean": 0.0006264589737838833, "clip_ratio/low_mean": 0.0006217975833351375, "clip_ratio/low_min": 1.3727212717640214e-05, "clip_ratio/region_mean": 0.0012482565762184095, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3908.0, "completions/mean_length": 715.8035888671875, "completions/mean_terminated_length": 598.7066650390625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.552930883639545, "grad_norm": 0.15931841731071472, "learning_rate": 1e-06, "loss": -0.0172, "num_tokens": 37262212.0, "reward": 0.6283482313156128, "reward_std": 0.15819068253040314, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 237 }, { "clip_ratio/high_max": 0.0020671617185143987, "clip_ratio/high_mean": 0.0007625681610079482, "clip_ratio/low_mean": 0.0006590588873223169, "clip_ratio/low_min": 1.3727212717640214e-05, "clip_ratio/region_mean": 0.0014216270938050002, "epoch": 0.5552639253426656, "grad_norm": 0.15216955542564392, "learning_rate": 1e-06, "loss": -0.0174, "step": 238 }, { "clip_ratio/high_max": 0.002131102268322138, "clip_ratio/high_mean": 0.000800670655507929, "clip_ratio/low_mean": 0.000872808812346193, "clip_ratio/low_min": 2.7454425435280427e-05, "clip_ratio/region_mean": 0.0016734794371586759, "epoch": 0.557596967045786, "grad_norm": 0.1372472047805786, "learning_rate": 1e-06, "loss": -0.0175, "step": 239 }, { "clip_ratio/high_max": 0.0021186753547226544, "clip_ratio/high_mean": 0.0008160655922893056, "clip_ratio/low_mean": 0.0009736781685205642, "clip_ratio/low_min": 2.7454425435280427e-05, "clip_ratio/region_mean": 0.0017897437792271376, "epoch": 0.5599300087489064, "grad_norm": 0.1365393102169037, "learning_rate": 1e-06, "loss": -0.0176, "step": 240 }, { "clip_ratio/high_max": 0.0018094236802426167, "clip_ratio/high_mean": 0.0007016712606855435, "clip_ratio/low_mean": 0.0006190466701809783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00132071793632349, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 876.6752319335938, "completions/mean_terminated_length": 649.7454833984375, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 0.5622630504520268, "grad_norm": 0.1651824414730072, "learning_rate": 1e-06, "loss": -0.0264, "num_tokens": 37890073.0, "reward": 0.566964328289032, "reward_std": 0.18942096829414368, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 241 }, { "clip_ratio/high_max": 0.0021219863556325436, "clip_ratio/high_mean": 0.000814467275631614, "clip_ratio/low_mean": 0.000841845436298172, "clip_ratio/low_min": 2.9780812837998383e-05, "clip_ratio/region_mean": 0.001656312721024733, "epoch": 0.5645960921551473, "grad_norm": 0.15240824222564697, "learning_rate": 1e-06, "loss": -0.0265, "step": 242 }, { "clip_ratio/high_max": 0.0025818969515967183, "clip_ratio/high_mean": 0.0009484981455898378, "clip_ratio/low_mean": 0.0009522576983727049, "clip_ratio/low_min": 3.338675378472544e-05, "clip_ratio/region_mean": 0.0019007558221346699, "epoch": 0.5669291338582677, "grad_norm": 0.1433403193950653, "learning_rate": 1e-06, "loss": -0.0267, "step": 243 }, { "clip_ratio/high_max": 0.0023972711351234466, "clip_ratio/high_mean": 0.0008792300177447032, "clip_ratio/low_mean": 0.0010734403404057957, "clip_ratio/low_min": 2.9780812837998383e-05, "clip_ratio/region_mean": 0.0019526703545125201, "epoch": 0.5692621755613881, "grad_norm": 0.1361435204744339, "learning_rate": 1e-06, "loss": -0.0268, "step": 244 }, { "clip_ratio/high_max": 0.0023388081972370856, "clip_ratio/high_mean": 0.0010041116147476714, "clip_ratio/low_mean": 0.0005375806940719485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015416922833537683, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3722.0, "completions/mean_length": 852.7991333007812, "completions/mean_terminated_length": 673.2579956054688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.5715952172645086, "grad_norm": 0.17590917646884918, "learning_rate": 1e-06, "loss": -0.0309, "num_tokens": 38545637.0, "reward": 0.5535714626312256, "reward_std": 0.21320165693759918, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 245 }, { "clip_ratio/high_max": 0.0028434059422579594, "clip_ratio/high_mean": 0.0011221420245419722, "clip_ratio/low_mean": 0.0007665483663004125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018886903635575436, "epoch": 0.573928258967629, "grad_norm": 0.1562732458114624, "learning_rate": 1e-06, "loss": -0.031, "step": 246 }, { "clip_ratio/high_max": 0.002739102128543891, "clip_ratio/high_mean": 0.0011587176923057996, "clip_ratio/low_mean": 0.0008570920199417742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020158097104285844, "epoch": 0.5762613006707495, "grad_norm": 0.14969033002853394, "learning_rate": 1e-06, "loss": -0.0312, "step": 247 }, { "clip_ratio/high_max": 0.0028818644204875454, "clip_ratio/high_mean": 0.0011963675678998698, "clip_ratio/low_mean": 0.0009386686633661157, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021350362767407205, "epoch": 0.57859434237387, "grad_norm": 0.14072629809379578, "learning_rate": 1e-06, "loss": -0.0312, "step": 248 }, { "clip_ratio/high_max": 0.0022883624587848317, "clip_ratio/high_mean": 0.000831738530905568, "clip_ratio/low_mean": 0.0006430429693864426, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014747815221198834, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3828.0, "completions/mean_length": 826.6328735351562, "completions/mean_terminated_length": 637.495849609375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.5809273840769904, "grad_norm": 0.16936250030994415, "learning_rate": 1e-06, "loss": -0.016, "num_tokens": 39164252.0, "reward": 0.6049107313156128, "reward_std": 0.19201312959194183, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 249 }, { "clip_ratio/high_max": 0.0022695836960338056, "clip_ratio/high_mean": 0.0009161839934677118, "clip_ratio/low_mean": 0.0008726210053282557, "clip_ratio/low_min": 4.6423947424045764e-05, "clip_ratio/region_mean": 0.001788805009709904, "epoch": 0.5832604257801108, "grad_norm": 0.1576201617717743, "learning_rate": 1e-06, "loss": -0.0163, "step": 250 }, { "clip_ratio/high_max": 0.0022874393725942355, "clip_ratio/high_mean": 0.0009589367127773585, "clip_ratio/low_mean": 0.0009178719137707958, "clip_ratio/low_min": 3.161702989018522e-05, "clip_ratio/region_mean": 0.0018768086192721967, "epoch": 0.5855934674832313, "grad_norm": 0.1577378660440445, "learning_rate": 1e-06, "loss": -0.0163, "step": 251 }, { "clip_ratio/high_max": 0.002503670479200082, "clip_ratio/high_mean": 0.0010142002920474624, "clip_ratio/low_mean": 0.0011390060462872498, "clip_ratio/low_min": 5.3262696383171715e-05, "clip_ratio/region_mean": 0.0021532063183258288, "epoch": 0.5879265091863517, "grad_norm": 0.151458278298378, "learning_rate": 1e-06, "loss": -0.0165, "step": 252 }, { "clip_ratio/high_max": 0.002000388420128729, "clip_ratio/high_mean": 0.0008652892101963516, "clip_ratio/low_mean": 0.0007732047506578965, "clip_ratio/low_min": 3.3055265703296755e-05, "clip_ratio/region_mean": 0.0016384939517593011, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 862.818115234375, "completions/mean_terminated_length": 659.5457153320312, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.5902595508894721, "grad_norm": 0.1744660884141922, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 39810017.0, "reward": 0.6194196939468384, "reward_std": 0.2055729776620865, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 253 }, { "clip_ratio/high_max": 0.002561314577178564, "clip_ratio/high_mean": 0.0010541134088271065, "clip_ratio/low_mean": 0.0008829656744637759, "clip_ratio/low_min": 3.761826064874185e-05, "clip_ratio/region_mean": 0.0019370790469110943, "epoch": 0.5925925925925926, "grad_norm": 0.15844956040382385, "learning_rate": 1e-06, "loss": -0.0421, "step": 254 }, { "clip_ratio/high_max": 0.0023657003112020902, "clip_ratio/high_mean": 0.0009961791802197695, "clip_ratio/low_mean": 0.0010405282337160315, "clip_ratio/low_min": 2.41173074755352e-05, "clip_ratio/region_mean": 0.00203670741757378, "epoch": 0.594925634295713, "grad_norm": 0.16475479304790497, "learning_rate": 1e-06, "loss": -0.0422, "step": 255 }, { "clip_ratio/high_max": 0.002772774372715503, "clip_ratio/high_mean": 0.0011516747654241044, "clip_ratio/low_mean": 0.0011741312482627109, "clip_ratio/low_min": 9.542025873088278e-05, "clip_ratio/region_mean": 0.002325806053704582, "epoch": 0.5972586759988335, "grad_norm": 0.1471288800239563, "learning_rate": 1e-06, "loss": -0.0423, "step": 256 }, { "clip_ratio/high_max": 0.0020365179116197396, "clip_ratio/high_mean": 0.0007399433470709482, "clip_ratio/low_mean": 0.0007232173720694846, "clip_ratio/low_min": 7.34406912670238e-05, "clip_ratio/region_mean": 0.0014631607118644752, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 850.8594360351562, "completions/mean_terminated_length": 667.1721801757812, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.599591717701954, "grad_norm": 0.16450372338294983, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 40473459.0, "reward": 0.5625, "reward_std": 0.1769806295633316, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 257 }, { "clip_ratio/high_max": 0.0023768204264342785, "clip_ratio/high_mean": 0.0009120186532527441, "clip_ratio/low_mean": 0.0007551489397883415, "clip_ratio/low_min": 7.853170700400369e-05, "clip_ratio/region_mean": 0.0016671675984980538, "epoch": 0.6019247594050744, "grad_norm": 0.16053013503551483, "learning_rate": 1e-06, "loss": -0.0006, "step": 258 }, { "clip_ratio/high_max": 0.0023812524013919756, "clip_ratio/high_mean": 0.0008976756034826394, "clip_ratio/low_mean": 0.000971376386587508, "clip_ratio/low_min": 0.00013718111313210102, "clip_ratio/region_mean": 0.0018690520228119567, "epoch": 0.6042578011081948, "grad_norm": 0.144574835896492, "learning_rate": 1e-06, "loss": -0.0008, "step": 259 }, { "clip_ratio/high_max": 0.0025085772431339137, "clip_ratio/high_mean": 0.0009723260591272265, "clip_ratio/low_mean": 0.001042663978296332, "clip_ratio/low_min": 0.00010595327785267727, "clip_ratio/region_mean": 0.002014990030147601, "epoch": 0.6065908428113153, "grad_norm": 0.140875905752182, "learning_rate": 1e-06, "loss": -0.0009, "step": 260 }, { "clip_ratio/high_max": 0.0021445616221171804, "clip_ratio/high_mean": 0.0007776104284857865, "clip_ratio/low_mean": 0.0006363705933836172, "clip_ratio/low_min": 1.6112400771817192e-05, "clip_ratio/region_mean": 0.0014139809918560786, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2334.0, "completions/mean_length": 803.5502319335938, "completions/mean_terminated_length": 592.3955078125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.6089238845144357, "grad_norm": 0.165104478597641, "learning_rate": 1e-06, "loss": -0.0082, "num_tokens": 41070792.0, "reward": 0.5993303656578064, "reward_std": 0.1750248819589615, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 261 }, { "clip_ratio/high_max": 0.002551183872128604, "clip_ratio/high_mean": 0.0009278637880925089, "clip_ratio/low_mean": 0.0007502610906158225, "clip_ratio/low_min": 2.9634898965014145e-05, "clip_ratio/region_mean": 0.001678124852332985, "epoch": 0.6112569262175561, "grad_norm": 0.15698152780532837, "learning_rate": 1e-06, "loss": -0.0083, "step": 262 }, { "clip_ratio/high_max": 0.002213197723904159, "clip_ratio/high_mean": 0.0008760010368860094, "clip_ratio/low_mean": 0.0008873216993379174, "clip_ratio/low_min": 4.396025269670645e-05, "clip_ratio/region_mean": 0.0017633227616897784, "epoch": 0.6135899679206765, "grad_norm": 0.15538519620895386, "learning_rate": 1e-06, "loss": -0.0084, "step": 263 }, { "clip_ratio/high_max": 0.0026316181501897518, "clip_ratio/high_mean": 0.0009802830427361187, "clip_ratio/low_mean": 0.0010764177441160427, "clip_ratio/low_min": 0.00012923500253236853, "clip_ratio/region_mean": 0.002056700788671151, "epoch": 0.615923009623797, "grad_norm": 0.13597901165485382, "learning_rate": 1e-06, "loss": -0.0086, "step": 264 }, { "clip_ratio/high_max": 0.0017064792227756698, "clip_ratio/high_mean": 0.0007641955508006504, "clip_ratio/low_mean": 0.0008332567922479939, "clip_ratio/low_min": 9.998057976190466e-05, "clip_ratio/region_mean": 0.0015974523812474217, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3588.0, "completions/mean_length": 902.0670166015625, "completions/mean_terminated_length": 672.8372802734375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.6182560513269175, "grad_norm": 0.24107509851455688, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 41728956.0, "reward": 0.5345982313156128, "reward_std": 0.20415081083774567, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 265 }, { "clip_ratio/high_max": 0.002118409778631758, "clip_ratio/high_mean": 0.000828780184747302, "clip_ratio/low_mean": 0.0009132619670708664, "clip_ratio/low_min": 5.219691956881434e-05, "clip_ratio/region_mean": 0.0017420421427232213, "epoch": 0.620589093030038, "grad_norm": 0.3318217992782593, "learning_rate": 1e-06, "loss": 0.0018, "step": 266 }, { "clip_ratio/high_max": 0.0023685053674853407, "clip_ratio/high_mean": 0.0009485268165008165, "clip_ratio/low_mean": 0.0011321363817842212, "clip_ratio/low_min": 0.00018766624634736218, "clip_ratio/region_mean": 0.002080663209198974, "epoch": 0.6229221347331584, "grad_norm": 0.1554151475429535, "learning_rate": 1e-06, "loss": 0.0015, "step": 267 }, { "clip_ratio/high_max": 0.002183291820983868, "clip_ratio/high_mean": 0.0008823609368846519, "clip_ratio/low_mean": 0.001225516065460397, "clip_ratio/low_min": 0.00015836049806239316, "clip_ratio/region_mean": 0.0021078770005260594, "epoch": 0.6252551764362788, "grad_norm": 0.1492011845111847, "learning_rate": 1e-06, "loss": 0.0014, "step": 268 }, { "clip_ratio/high_max": 0.0019264101465523709, "clip_ratio/high_mean": 0.0007137561851777718, "clip_ratio/low_mean": 0.0006577908879989991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001371547085000202, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 878.0636596679688, "completions/mean_terminated_length": 663.5345458984375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.6275882181393992, "grad_norm": 0.16863220930099487, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 42369949.0, "reward": 0.5368303656578064, "reward_std": 0.16319186985492706, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 269 }, { "clip_ratio/high_max": 0.002367245891946368, "clip_ratio/high_mean": 0.0008438087133981753, "clip_ratio/low_mean": 0.0009099655726458877, "clip_ratio/low_min": 1.2755102034134325e-05, "clip_ratio/region_mean": 0.0017537742533022538, "epoch": 0.6299212598425197, "grad_norm": 0.1605025827884674, "learning_rate": 1e-06, "loss": -0.0057, "step": 270 }, { "clip_ratio/high_max": 0.0025126124292000895, "clip_ratio/high_mean": 0.0008548467694708961, "clip_ratio/low_mean": 0.0009638360970711801, "clip_ratio/low_min": 2.4831148039083928e-05, "clip_ratio/region_mean": 0.0018186828419857193, "epoch": 0.6322543015456401, "grad_norm": 0.1469624787569046, "learning_rate": 1e-06, "loss": -0.0059, "step": 271 }, { "clip_ratio/high_max": 0.002169972543924814, "clip_ratio/high_mean": 0.0008567410677642329, "clip_ratio/low_mean": 0.0011263936721661594, "clip_ratio/low_min": 3.6721503420267254e-05, "clip_ratio/region_mean": 0.001983134789043106, "epoch": 0.6345873432487605, "grad_norm": 0.15112292766571045, "learning_rate": 1e-06, "loss": -0.006, "step": 272 }, { "clip_ratio/high_max": 0.0025859194902295712, "clip_ratio/high_mean": 0.0009666358764661709, "clip_ratio/low_mean": 0.0005736249649999081, "clip_ratio/low_min": 3.656467561086174e-05, "clip_ratio/region_mean": 0.001540260826004669, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3591.0, "completions/mean_length": 952.0781860351562, "completions/mean_terminated_length": 697.9855346679688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.636920384951881, "grad_norm": 0.18112686276435852, "learning_rate": 1e-06, "loss": -0.0304, "num_tokens": 43038947.0, "reward": 0.590401828289032, "reward_std": 0.2192525565624237, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 273 }, { "clip_ratio/high_max": 0.0030897176038706675, "clip_ratio/high_mean": 0.0011518080391397234, "clip_ratio/low_mean": 0.0007482507317035925, "clip_ratio/low_min": 3.253998147556558e-05, "clip_ratio/region_mean": 0.0019000587417394854, "epoch": 0.6392534266550015, "grad_norm": 0.1668241173028946, "learning_rate": 1e-06, "loss": -0.0307, "step": 274 }, { "clip_ratio/high_max": 0.003141061511996668, "clip_ratio/high_mean": 0.001168641247204505, "clip_ratio/low_mean": 0.0008392504605581053, "clip_ratio/low_min": 5.431662066257559e-05, "clip_ratio/region_mean": 0.0020078917223145254, "epoch": 0.6415864683581219, "grad_norm": 0.1468450129032135, "learning_rate": 1e-06, "loss": -0.0309, "step": 275 }, { "clip_ratio/high_max": 0.003024582467332948, "clip_ratio/high_mean": 0.0011891605572600383, "clip_ratio/low_mean": 0.0009700555565359537, "clip_ratio/low_min": 8.138310477079358e-05, "clip_ratio/region_mean": 0.0021592160337604582, "epoch": 0.6439195100612424, "grad_norm": 0.15136562287807465, "learning_rate": 1e-06, "loss": -0.0309, "step": 276 }, { "clip_ratio/high_max": 0.002224248746642843, "clip_ratio/high_mean": 0.0009871662405203097, "clip_ratio/low_mean": 0.0007879494714870816, "clip_ratio/low_min": 7.20886037015589e-05, "clip_ratio/region_mean": 0.0017751157356542535, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3389.0, "completions/mean_length": 900.8906860351562, "completions/mean_terminated_length": 679.7494506835938, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.6462525517643628, "grad_norm": 0.2031947523355484, "learning_rate": 1e-06, "loss": -0.032, "num_tokens": 43698617.0, "reward": 0.5245535969734192, "reward_std": 0.2521325349807739, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 277 }, { "clip_ratio/high_max": 0.0023527197481598705, "clip_ratio/high_mean": 0.0010342140340071637, "clip_ratio/low_mean": 0.0010337477087887237, "clip_ratio/low_min": 0.00013410693372861715, "clip_ratio/region_mean": 0.002067961759166792, "epoch": 0.6485855934674832, "grad_norm": 0.17283844947814941, "learning_rate": 1e-06, "loss": -0.0322, "step": 278 }, { "clip_ratio/high_max": 0.0027428499452071264, "clip_ratio/high_mean": 0.0011379667957953643, "clip_ratio/low_mean": 0.0011746861309802625, "clip_ratio/low_min": 0.0001013571054500062, "clip_ratio/region_mean": 0.0023126528758439235, "epoch": 0.6509186351706037, "grad_norm": 0.18353228271007538, "learning_rate": 1e-06, "loss": -0.0324, "step": 279 }, { "clip_ratio/high_max": 0.002383102248131763, "clip_ratio/high_mean": 0.0011230513810005505, "clip_ratio/low_mean": 0.0013056921379757114, "clip_ratio/low_min": 0.00011297159107925836, "clip_ratio/region_mean": 0.002428743500786368, "epoch": 0.6532516768737241, "grad_norm": 0.16989165544509888, "learning_rate": 1e-06, "loss": -0.0325, "step": 280 }, { "clip_ratio/high_max": 0.0018031610161415301, "clip_ratio/high_mean": 0.0007382066487480188, "clip_ratio/low_mean": 0.0006247957589948783, "clip_ratio/low_min": 3.405358438612893e-05, "clip_ratio/region_mean": 0.0013630023677251302, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3357.0, "completions/mean_length": 908.3125610351562, "completions/mean_terminated_length": 638.1694946289062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.6555847185768445, "grad_norm": 0.1712566614151001, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 44325329.0, "reward": 0.559151828289032, "reward_std": 0.17006804049015045, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 281 }, { "clip_ratio/high_max": 0.0019251347512181383, "clip_ratio/high_mean": 0.0007723017515672836, "clip_ratio/low_mean": 0.0007710827940172749, "clip_ratio/low_min": 3.71518744941568e-05, "clip_ratio/region_mean": 0.0015433845364896115, "epoch": 0.657917760279965, "grad_norm": 0.19617384672164917, "learning_rate": 1e-06, "loss": -0.0071, "step": 282 }, { "clip_ratio/high_max": 0.0022511800489155576, "clip_ratio/high_mean": 0.0009322707974206423, "clip_ratio/low_mean": 0.0009935805755958427, "clip_ratio/low_min": 7.676373752474319e-05, "clip_ratio/region_mean": 0.0019258513602835592, "epoch": 0.6602508019830855, "grad_norm": 0.1522125005722046, "learning_rate": 1e-06, "loss": -0.0074, "step": 283 }, { "clip_ratio/high_max": 0.002197074478317518, "clip_ratio/high_mean": 0.0008453116188320564, "clip_ratio/low_mean": 0.001044545915647177, "clip_ratio/low_min": 0.0001263392605324043, "clip_ratio/region_mean": 0.0018898575071943924, "epoch": 0.6625838436862059, "grad_norm": 0.1423969864845276, "learning_rate": 1e-06, "loss": -0.0074, "step": 284 }, { "clip_ratio/high_max": 0.002228131423180457, "clip_ratio/high_mean": 0.0009918026426021243, "clip_ratio/low_mean": 0.0007022289737506071, "clip_ratio/low_min": 2.9418007216008846e-05, "clip_ratio/region_mean": 0.0016940316490945406, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 859.294677734375, "completions/mean_terminated_length": 659.8768310546875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.6649168853893264, "grad_norm": 0.1992596983909607, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 44979705.0, "reward": 0.5446428656578064, "reward_std": 0.22022753953933716, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 285 }, { "clip_ratio/high_max": 0.0023874518228694797, "clip_ratio/high_mean": 0.0010613761533022625, "clip_ratio/low_mean": 0.000938201406825101, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019995775655843318, "epoch": 0.6672499270924468, "grad_norm": 0.17571620643138885, "learning_rate": 1e-06, "loss": -0.0102, "step": 286 }, { "clip_ratio/high_max": 0.0024007014435483143, "clip_ratio/high_mean": 0.00111949186612037, "clip_ratio/low_mean": 0.001057549030520022, "clip_ratio/low_min": 5.883601443201769e-05, "clip_ratio/region_mean": 0.00217704084207071, "epoch": 0.6695829687955672, "grad_norm": 0.19940689206123352, "learning_rate": 1e-06, "loss": -0.0103, "step": 287 }, { "clip_ratio/high_max": 0.002804509065754246, "clip_ratio/high_mean": 0.0011859204387292266, "clip_ratio/low_mean": 0.0012177887711004587, "clip_ratio/low_min": 5.883601443201769e-05, "clip_ratio/region_mean": 0.002403709186182823, "epoch": 0.6719160104986877, "grad_norm": 0.15958590805530548, "learning_rate": 1e-06, "loss": -0.0105, "step": 288 }, { "clip_ratio/high_max": 0.002107639877067413, "clip_ratio/high_mean": 0.0007302628273464506, "clip_ratio/low_mean": 0.0005998617989462218, "clip_ratio/low_min": 6.199027302500326e-05, "clip_ratio/region_mean": 0.0013301246253831778, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3905.0, "completions/mean_length": 931.5000610351562, "completions/mean_terminated_length": 663.3220825195312, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.6742490522018081, "grad_norm": 0.16017627716064453, "learning_rate": 1e-06, "loss": -0.0258, "num_tokens": 45624385.0, "reward": 0.559151828289032, "reward_std": 0.15571725368499756, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 289 }, { "clip_ratio/high_max": 0.00211996757570887, "clip_ratio/high_mean": 0.000799043718870962, "clip_ratio/low_mean": 0.0006761148606528877, "clip_ratio/low_min": 3.019688301719725e-05, "clip_ratio/region_mean": 0.0014751585840713233, "epoch": 0.6765820939049285, "grad_norm": 0.18126334249973297, "learning_rate": 1e-06, "loss": -0.026, "step": 290 }, { "clip_ratio/high_max": 0.0024122541180986445, "clip_ratio/high_mean": 0.00091742939366668, "clip_ratio/low_mean": 0.0008994874424388399, "clip_ratio/low_min": 6.199027302500326e-05, "clip_ratio/region_mean": 0.0018169168106396683, "epoch": 0.678915135608049, "grad_norm": 0.14620453119277954, "learning_rate": 1e-06, "loss": -0.0261, "step": 291 }, { "clip_ratio/high_max": 0.0027023746479244437, "clip_ratio/high_mean": 0.0009316678697359748, "clip_ratio/low_mean": 0.0009326811541541247, "clip_ratio/low_min": 1.0813148946908768e-05, "clip_ratio/region_mean": 0.0018643490402610041, "epoch": 0.6812481773111695, "grad_norm": 0.12853315472602844, "learning_rate": 1e-06, "loss": -0.0262, "step": 292 }, { "clip_ratio/high_max": 0.002244984279968776, "clip_ratio/high_mean": 0.0007957256875670282, "clip_ratio/low_mean": 0.0006222014708328061, "clip_ratio/low_min": 4.063127198605798e-05, "clip_ratio/region_mean": 0.001417927174770739, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3306.0, "completions/mean_length": 963.935302734375, "completions/mean_terminated_length": 686.1215209960938, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.6835812190142899, "grad_norm": 0.1880384236574173, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 46278767.0, "reward": 0.5703125, "reward_std": 0.1770900934934616, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 293 }, { "clip_ratio/high_max": 0.002521429523767438, "clip_ratio/high_mean": 0.00093118618860899, "clip_ratio/low_mean": 0.0007033641941234237, "clip_ratio/low_min": 6.987263077462558e-05, "clip_ratio/region_mean": 0.0016345503827324137, "epoch": 0.6859142607174104, "grad_norm": 0.1694038212299347, "learning_rate": 1e-06, "loss": -0.0525, "step": 294 }, { "clip_ratio/high_max": 0.0027518795468495227, "clip_ratio/high_mean": 0.0010234643996227533, "clip_ratio/low_mean": 0.0008573010381951462, "clip_ratio/low_min": 0.00011226113565498963, "clip_ratio/region_mean": 0.0018807654705597088, "epoch": 0.6882473024205308, "grad_norm": 0.15922002494335175, "learning_rate": 1e-06, "loss": -0.0526, "step": 295 }, { "clip_ratio/high_max": 0.0026839955244213343, "clip_ratio/high_mean": 0.0009785380279936362, "clip_ratio/low_mean": 0.0009404030770383542, "clip_ratio/low_min": 7.932626431284007e-05, "clip_ratio/region_mean": 0.0019189410959370434, "epoch": 0.6905803441236512, "grad_norm": 0.15283632278442383, "learning_rate": 1e-06, "loss": -0.0527, "step": 296 }, { "clip_ratio/high_max": 0.0014327054050227161, "clip_ratio/high_mean": 0.0005220494294917444, "clip_ratio/low_mean": 0.00042884425738520804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009508936745987739, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 938.2734985351562, "completions/mean_terminated_length": 695.3713989257812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.6929133858267716, "grad_norm": 0.17274463176727295, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 46942380.0, "reward": 0.5167410969734192, "reward_std": 0.14252512156963348, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 297 }, { "clip_ratio/high_max": 0.0018583712408144493, "clip_ratio/high_mean": 0.0006815474316681502, "clip_ratio/low_mean": 0.0006500353038063622, "clip_ratio/low_min": 8.836420420266222e-06, "clip_ratio/region_mean": 0.0013315827563928906, "epoch": 0.6952464275298921, "grad_norm": 0.22150209546089172, "learning_rate": 1e-06, "loss": -0.0061, "step": 298 }, { "clip_ratio/high_max": 0.002125693663401762, "clip_ratio/high_mean": 0.0007630191430507693, "clip_ratio/low_mean": 0.0007003955361142289, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001463414646423189, "epoch": 0.6975794692330125, "grad_norm": 0.26326972246170044, "learning_rate": 1e-06, "loss": -0.0062, "step": 299 }, { "clip_ratio/high_max": 0.0020103206734347623, "clip_ratio/high_mean": 0.0007139106846807408, "clip_ratio/low_mean": 0.0008527872341801412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001566697916132398, "epoch": 0.6999125109361329, "grad_norm": 0.1435539722442627, "learning_rate": 1e-06, "loss": -0.0063, "step": 300 }, { "clip_ratio/high_max": 0.0019260439075878821, "clip_ratio/high_mean": 0.0007802204017934855, "clip_ratio/low_mean": 0.0006665019591309829, "clip_ratio/low_min": 3.7309739127522334e-05, "clip_ratio/region_mean": 0.0014467223882093094, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 972.4910888671875, "completions/mean_terminated_length": 720.0482788085938, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.7022455526392535, "grad_norm": 0.21303707361221313, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 47632540.0, "reward": 0.5792410969734192, "reward_std": 0.19498330354690552, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 301 }, { "clip_ratio/high_max": 0.002353203482925892, "clip_ratio/high_mean": 0.0009827109606703743, "clip_ratio/low_mean": 0.0007979882047948195, "clip_ratio/low_min": 4.723689471575199e-05, "clip_ratio/region_mean": 0.0017806991309043951, "epoch": 0.7045785943423739, "grad_norm": 0.16463270783424377, "learning_rate": 1e-06, "loss": -0.0264, "step": 302 }, { "clip_ratio/high_max": 0.0023819890775484964, "clip_ratio/high_mean": 0.0010660778134479187, "clip_ratio/low_mean": 0.0009695578792161541, "clip_ratio/low_min": 5.338591836334672e-05, "clip_ratio/region_mean": 0.002035635683569126, "epoch": 0.7069116360454943, "grad_norm": 0.16458259522914886, "learning_rate": 1e-06, "loss": -0.0266, "step": 303 }, { "clip_ratio/high_max": 0.002431227323540952, "clip_ratio/high_mean": 0.001081175225408515, "clip_ratio/low_mean": 0.0011479896675155032, "clip_ratio/low_min": 6.402207600331167e-05, "clip_ratio/region_mean": 0.0022291648710961454, "epoch": 0.7092446777486148, "grad_norm": 0.15164686739444733, "learning_rate": 1e-06, "loss": -0.0267, "step": 304 }, { "clip_ratio/high_max": 0.0017704722667986061, "clip_ratio/high_mean": 0.0007542049534094986, "clip_ratio/low_mean": 0.0007785144589433912, "clip_ratio/low_min": 6.762955126760062e-05, "clip_ratio/region_mean": 0.001532719390525017, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 1004.86279296875, "completions/mean_terminated_length": 663.9566650390625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.7115777194517352, "grad_norm": 0.19585666060447693, "learning_rate": 1e-06, "loss": -0.0405, "num_tokens": 48259977.0, "reward": 0.5680803656578064, "reward_std": 0.2101946473121643, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 305 }, { "clip_ratio/high_max": 0.002480101305991411, "clip_ratio/high_mean": 0.0010079666135425214, "clip_ratio/low_mean": 0.0008802175470918883, "clip_ratio/low_min": 0.00010291668877471238, "clip_ratio/region_mean": 0.0018881841751863249, "epoch": 0.7139107611548556, "grad_norm": 0.17813493311405182, "learning_rate": 1e-06, "loss": -0.0407, "step": 306 }, { "clip_ratio/high_max": 0.002498487723642029, "clip_ratio/high_mean": 0.0010281934191880282, "clip_ratio/low_mean": 0.0011755823288694955, "clip_ratio/low_min": 0.00013967133600090165, "clip_ratio/region_mean": 0.0022037758099031635, "epoch": 0.7162438028579761, "grad_norm": 0.16861718893051147, "learning_rate": 1e-06, "loss": -0.0409, "step": 307 }, { "clip_ratio/high_max": 0.0026571681009954773, "clip_ratio/high_mean": 0.0010816050453286152, "clip_ratio/low_mean": 0.0012901842310384382, "clip_ratio/low_min": 0.00017554063742863946, "clip_ratio/region_mean": 0.00237178922543535, "epoch": 0.7185768445610965, "grad_norm": 0.15887220203876495, "learning_rate": 1e-06, "loss": -0.041, "step": 308 }, { "clip_ratio/high_max": 0.0018626099736138713, "clip_ratio/high_mean": 0.0006524443851958495, "clip_ratio/low_mean": 0.000669809989631176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013222543930169195, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3347.0, "completions/mean_length": 931.22998046875, "completions/mean_terminated_length": 646.3236083984375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.7209098862642169, "grad_norm": 0.16430722177028656, "learning_rate": 1e-06, "loss": -0.0194, "num_tokens": 48877167.0, "reward": 0.6205357313156128, "reward_std": 0.15665017068386078, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 309 }, { "clip_ratio/high_max": 0.0019592823373386636, "clip_ratio/high_mean": 0.0007627188206242863, "clip_ratio/low_mean": 0.0007951916195452213, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015579104656353593, "epoch": 0.7232429279673375, "grad_norm": 0.15908190608024597, "learning_rate": 1e-06, "loss": -0.0195, "step": 310 }, { "clip_ratio/high_max": 0.0020298409144743346, "clip_ratio/high_mean": 0.0007692790331930155, "clip_ratio/low_mean": 0.0009578395511198323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017271185934077948, "epoch": 0.7255759696704579, "grad_norm": 0.1455083042383194, "learning_rate": 1e-06, "loss": -0.0197, "step": 311 }, { "clip_ratio/high_max": 0.0020402515583555214, "clip_ratio/high_mean": 0.0008099767055682605, "clip_ratio/low_mean": 0.0010112691415997688, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018212458307971247, "epoch": 0.7279090113735783, "grad_norm": 0.14736001193523407, "learning_rate": 1e-06, "loss": -0.0197, "step": 312 }, { "clip_ratio/high_max": 0.002634305026731454, "clip_ratio/high_mean": 0.0009955086425179616, "clip_ratio/low_mean": 0.000669814377943112, "clip_ratio/low_min": 8.006661118997727e-06, "clip_ratio/region_mean": 0.0016653229904477485, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2359.0, "completions/mean_length": 852.3381958007812, "completions/mean_terminated_length": 619.5394287109375, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.7302420530766988, "grad_norm": 0.23050937056541443, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 49484190.0, "reward": 0.574776828289032, "reward_std": 0.20958594977855682, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 313 }, { "clip_ratio/high_max": 0.0025715668743941933, "clip_ratio/high_mean": 0.0010657229249773081, "clip_ratio/low_mean": 0.0009035015518747969, "clip_ratio/low_min": 1.6013322237995453e-05, "clip_ratio/region_mean": 0.001969224525964819, "epoch": 0.7325750947798192, "grad_norm": 0.19144074618816376, "learning_rate": 1e-06, "loss": -0.0333, "step": 314 }, { "clip_ratio/high_max": 0.003424244081543293, "clip_ratio/high_mean": 0.0011974710741924355, "clip_ratio/low_mean": 0.0010006037591665518, "clip_ratio/low_min": 1.6013322237995453e-05, "clip_ratio/region_mean": 0.002198074827902019, "epoch": 0.7349081364829396, "grad_norm": 0.18723425269126892, "learning_rate": 1e-06, "loss": -0.0334, "step": 315 }, { "clip_ratio/high_max": 0.0029578319081338122, "clip_ratio/high_mean": 0.001201204850076465, "clip_ratio/low_mean": 0.0012593220544658834, "clip_ratio/low_min": 1.6013322237995453e-05, "clip_ratio/region_mean": 0.002460526942741126, "epoch": 0.73724117818606, "grad_norm": 0.1673799604177475, "learning_rate": 1e-06, "loss": -0.0335, "step": 316 }, { "clip_ratio/high_max": 0.002176768146455288, "clip_ratio/high_mean": 0.0009286732965847477, "clip_ratio/low_mean": 0.0005328897141225752, "clip_ratio/low_min": 1.14009490062017e-05, "clip_ratio/region_mean": 0.0014615630389016587, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 896.6998291015625, "completions/mean_terminated_length": 591.632080078125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.7395742198891805, "grad_norm": 0.22170208394527435, "learning_rate": 1e-06, "loss": -0.0245, "num_tokens": 50058193.0, "reward": 0.59375, "reward_std": 0.1812213957309723, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 317 }, { "clip_ratio/high_max": 0.001941384223755449, "clip_ratio/high_mean": 0.0009143285442405613, "clip_ratio/low_mean": 0.0007849243647797266, "clip_ratio/low_min": 1.9765970137086697e-05, "clip_ratio/region_mean": 0.0016992528835544363, "epoch": 0.7419072615923009, "grad_norm": 0.17754168808460236, "learning_rate": 1e-06, "loss": -0.0247, "step": 318 }, { "clip_ratio/high_max": 0.0025080476843868382, "clip_ratio/high_mean": 0.0011364773199602496, "clip_ratio/low_mean": 0.0009200657805195078, "clip_ratio/low_min": 9.882985068543348e-06, "clip_ratio/region_mean": 0.002056543147773482, "epoch": 0.7442403032954215, "grad_norm": 0.1744786649942398, "learning_rate": 1e-06, "loss": -0.0249, "step": 319 }, { "clip_ratio/high_max": 0.002459716903103981, "clip_ratio/high_mean": 0.0010940259890048765, "clip_ratio/low_mean": 0.0011155503252666676, "clip_ratio/low_min": 9.882985068543348e-06, "clip_ratio/region_mean": 0.00220957632700447, "epoch": 0.7465733449985419, "grad_norm": 0.2607669234275818, "learning_rate": 1e-06, "loss": -0.025, "step": 320 }, { "clip_ratio/high_max": 0.002561289831646718, "clip_ratio/high_mean": 0.0009904731014103163, "clip_ratio/low_mean": 0.0007045711881801253, "clip_ratio/low_min": 2.972881520690862e-05, "clip_ratio/region_mean": 0.001695044309599325, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3718.0, "completions/mean_length": 991.2388916015625, "completions/mean_terminated_length": 703.48046875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.7489063867016623, "grad_norm": 0.1997159719467163, "learning_rate": 1e-06, "loss": -0.0189, "num_tokens": 50730863.0, "reward": 0.5647321939468384, "reward_std": 0.2045545130968094, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 321 }, { "clip_ratio/high_max": 0.0024462696674163453, "clip_ratio/high_mean": 0.0010274357173329918, "clip_ratio/low_mean": 0.0008851840229908703, "clip_ratio/low_min": 1.906359648273792e-05, "clip_ratio/region_mean": 0.0019126197148580104, "epoch": 0.7512394284047827, "grad_norm": 0.16949966549873352, "learning_rate": 1e-06, "loss": -0.0191, "step": 322 }, { "clip_ratio/high_max": 0.0024892661458579823, "clip_ratio/high_mean": 0.001082087605027482, "clip_ratio/low_mean": 0.0010396083653176902, "clip_ratio/low_min": 3.241152626287658e-05, "clip_ratio/region_mean": 0.0021216960121819284, "epoch": 0.7535724701079032, "grad_norm": 0.1658371537923813, "learning_rate": 1e-06, "loss": -0.0192, "step": 323 }, { "clip_ratio/high_max": 0.002635364216985181, "clip_ratio/high_mean": 0.0011081993725383654, "clip_ratio/low_mean": 0.0011569410180527484, "clip_ratio/low_min": 7.293935959751252e-05, "clip_ratio/region_mean": 0.002265140319650527, "epoch": 0.7559055118110236, "grad_norm": 0.1584588587284088, "learning_rate": 1e-06, "loss": -0.0193, "step": 324 }, { "clip_ratio/high_max": 0.002286330869537778, "clip_ratio/high_mean": 0.0009643776393204462, "clip_ratio/low_mean": 0.0006063846167307929, "clip_ratio/low_min": 3.167227168887621e-05, "clip_ratio/region_mean": 0.0015707622515037656, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3667.0, "completions/mean_length": 862.4330444335938, "completions/mean_terminated_length": 634.4993896484375, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.758238553514144, "grad_norm": 0.20087257027626038, "learning_rate": 1e-06, "loss": -0.0331, "num_tokens": 51356107.0, "reward": 0.6149553656578064, "reward_std": 0.17528954148292542, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 325 }, { "clip_ratio/high_max": 0.002545041650591884, "clip_ratio/high_mean": 0.0010686774730856996, "clip_ratio/low_mean": 0.0007562796445199638, "clip_ratio/low_min": 4.227381396049168e-05, "clip_ratio/region_mean": 0.001824957100325264, "epoch": 0.7605715952172645, "grad_norm": 0.17757172882556915, "learning_rate": 1e-06, "loss": -0.0334, "step": 326 }, { "clip_ratio/high_max": 0.0027258722184342332, "clip_ratio/high_mean": 0.0011290064721833915, "clip_ratio/low_mean": 0.0008904586029530037, "clip_ratio/low_min": 9.828733345784713e-05, "clip_ratio/region_mean": 0.002019465075136395, "epoch": 0.7629046369203849, "grad_norm": 0.16543947160243988, "learning_rate": 1e-06, "loss": -0.0335, "step": 327 }, { "clip_ratio/high_max": 0.0029484552069334313, "clip_ratio/high_mean": 0.0011796745820902288, "clip_ratio/low_mean": 0.0010999626847478794, "clip_ratio/low_min": 0.00010217612725682557, "clip_ratio/region_mean": 0.002279637243191246, "epoch": 0.7652376786235054, "grad_norm": 0.15951386094093323, "learning_rate": 1e-06, "loss": -0.0336, "step": 328 }, { "clip_ratio/high_max": 0.001977269319468178, "clip_ratio/high_mean": 0.000796055763203185, "clip_ratio/low_mean": 0.0004337097625466413, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012297655521251727, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3464.0, "completions/mean_length": 955.6920166015625, "completions/mean_terminated_length": 677.1470336914062, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.7675707203266259, "grad_norm": 0.1743716299533844, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 51998615.0, "reward": 0.6205357313156128, "reward_std": 0.15744292736053467, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 329 }, { "clip_ratio/high_max": 0.0020376929751364514, "clip_ratio/high_mean": 0.0008635195044917054, "clip_ratio/low_mean": 0.0004995460312784417, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013630655776069034, "epoch": 0.7699037620297463, "grad_norm": 0.16105009615421295, "learning_rate": 1e-06, "loss": -0.0296, "step": 330 }, { "clip_ratio/high_max": 0.0020978100146749057, "clip_ratio/high_mean": 0.0009213344874297036, "clip_ratio/low_mean": 0.0006812520859966753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016025865443225484, "epoch": 0.7722368037328667, "grad_norm": 0.15556097030639648, "learning_rate": 1e-06, "loss": -0.0297, "step": 331 }, { "clip_ratio/high_max": 0.002186413199524395, "clip_ratio/high_mean": 0.0009034329323185375, "clip_ratio/low_mean": 0.0007822174156899564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016856503389135469, "epoch": 0.7745698454359872, "grad_norm": 0.14274178445339203, "learning_rate": 1e-06, "loss": -0.0299, "step": 332 }, { "clip_ratio/high_max": 0.00177364839692018, "clip_ratio/high_mean": 0.0006682924713459215, "clip_ratio/low_mean": 0.0005507686728378758, "clip_ratio/low_min": 2.349624082853552e-05, "clip_ratio/region_mean": 0.0012190611560072284, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 1081.1763916015625, "completions/mean_terminated_length": 676.6557006835938, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.7769028871391076, "grad_norm": 0.24274273216724396, "learning_rate": 1e-06, "loss": -0.0192, "num_tokens": 52628789.0, "reward": 0.5234375, "reward_std": 0.15161733329296112, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 333 }, { "clip_ratio/high_max": 0.0021593021883745678, "clip_ratio/high_mean": 0.0008340619278897066, "clip_ratio/low_mean": 0.0006656840587311308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014997460020822473, "epoch": 0.779235928842228, "grad_norm": 0.172617107629776, "learning_rate": 1e-06, "loss": -0.0193, "step": 334 }, { "clip_ratio/high_max": 0.0022033443456166424, "clip_ratio/high_mean": 0.0008907091087166918, "clip_ratio/low_mean": 0.0008204151599784382, "clip_ratio/low_min": 4.699248165707104e-05, "clip_ratio/region_mean": 0.0017111242559622042, "epoch": 0.7815689705453485, "grad_norm": 0.149750754237175, "learning_rate": 1e-06, "loss": -0.0195, "step": 335 }, { "clip_ratio/high_max": 0.002309989075001795, "clip_ratio/high_mean": 0.0008876610172592336, "clip_ratio/low_mean": 0.0010042150188382948, "clip_ratio/low_min": 6.596306047867984e-05, "clip_ratio/region_mean": 0.00189187606156338, "epoch": 0.7839020122484689, "grad_norm": 0.17050692439079285, "learning_rate": 1e-06, "loss": -0.0196, "step": 336 }, { "clip_ratio/high_max": 0.0019952177863160614, "clip_ratio/high_mean": 0.0007473286750609986, "clip_ratio/low_mean": 0.0004356662479949591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011829949216917157, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 960.30810546875, "completions/mean_terminated_length": 635.9260864257812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.7862350539515894, "grad_norm": 0.18488436937332153, "learning_rate": 1e-06, "loss": -0.0318, "num_tokens": 53234889.0, "reward": 0.5803571939468384, "reward_std": 0.16262872517108917, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 337 }, { "clip_ratio/high_max": 0.0023194205859908834, "clip_ratio/high_mean": 0.000905933326066588, "clip_ratio/low_mean": 0.000658723060041666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015646564206690527, "epoch": 0.7885680956547099, "grad_norm": 0.15874096751213074, "learning_rate": 1e-06, "loss": -0.032, "step": 338 }, { "clip_ratio/high_max": 0.0021224655574769713, "clip_ratio/high_mean": 0.000954909894062439, "clip_ratio/low_mean": 0.0007223011557471182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016772110539022833, "epoch": 0.7909011373578303, "grad_norm": 0.1587703675031662, "learning_rate": 1e-06, "loss": -0.0321, "step": 339 }, { "clip_ratio/high_max": 0.0023340300176641904, "clip_ratio/high_mean": 0.000885969875525916, "clip_ratio/low_mean": 0.0008787779897829751, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017647478598519228, "epoch": 0.7932341790609507, "grad_norm": 0.15086764097213745, "learning_rate": 1e-06, "loss": -0.0322, "step": 340 }, { "clip_ratio/high_max": 0.001937615948918392, "clip_ratio/high_mean": 0.0006910750171300606, "clip_ratio/low_mean": 0.0005697111000699806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012607861172000412, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2683.0, "completions/mean_length": 890.7567138671875, "completions/mean_terminated_length": 652.4772338867188, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.7955672207640712, "grad_norm": 0.17974796891212463, "learning_rate": 1e-06, "loss": -0.0277, "num_tokens": 53865919.0, "reward": 0.598214328289032, "reward_std": 0.15518732368946075, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 341 }, { "clip_ratio/high_max": 0.0018351282851654105, "clip_ratio/high_mean": 0.0007653152588318335, "clip_ratio/low_mean": 0.0006482064327428816, "clip_ratio/low_min": 1.0539629329286981e-05, "clip_ratio/region_mean": 0.0014135217061266303, "epoch": 0.7979002624671916, "grad_norm": 0.20091377198696136, "learning_rate": 1e-06, "loss": -0.0278, "step": 342 }, { "clip_ratio/high_max": 0.0022948616824578494, "clip_ratio/high_mean": 0.0009078027542273048, "clip_ratio/low_mean": 0.0007261146765813464, "clip_ratio/low_min": 1.9089799025096e-05, "clip_ratio/region_mean": 0.001633917425351683, "epoch": 0.800233304170312, "grad_norm": 7.059561729431152, "learning_rate": 1e-06, "loss": -0.0277, "step": 343 }, { "clip_ratio/high_max": 0.001991556684515672, "clip_ratio/high_mean": 0.0007547452023572987, "clip_ratio/low_mean": 0.0008247352780017536, "clip_ratio/low_min": 9.544899512548e-06, "clip_ratio/region_mean": 0.0015794804639881477, "epoch": 0.8025663458734325, "grad_norm": 0.1414581835269928, "learning_rate": 1e-06, "loss": -0.028, "step": 344 }, { "clip_ratio/high_max": 0.0020483633488765918, "clip_ratio/high_mean": 0.0008616792129032547, "clip_ratio/low_mean": 0.0006169624598442169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014786416249989998, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3378.0, "completions/mean_length": 905.8594360351562, "completions/mean_terminated_length": 643.8671264648438, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.8048993875765529, "grad_norm": 0.206162691116333, "learning_rate": 1e-06, "loss": -0.0325, "num_tokens": 54479705.0, "reward": 0.5881696939468384, "reward_std": 0.17607727646827698, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 345 }, { "clip_ratio/high_max": 0.0022860596800455824, "clip_ratio/high_mean": 0.0009053035664692288, "clip_ratio/low_mean": 0.0008204468977055512, "clip_ratio/low_min": 1.3351847883313894e-05, "clip_ratio/region_mean": 0.0017257504659937695, "epoch": 0.8072324292796734, "grad_norm": 0.1690080612897873, "learning_rate": 1e-06, "loss": -0.0327, "step": 346 }, { "clip_ratio/high_max": 0.002454709989251569, "clip_ratio/high_mean": 0.0010995961711159907, "clip_ratio/low_mean": 0.0009662806096457643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002065876717097126, "epoch": 0.8095654709827939, "grad_norm": 0.16445612907409668, "learning_rate": 1e-06, "loss": -0.0329, "step": 347 }, { "clip_ratio/high_max": 0.002345841421629302, "clip_ratio/high_mean": 0.0010542023828747915, "clip_ratio/low_mean": 0.001126510302128736, "clip_ratio/low_min": 1.3351847883313894e-05, "clip_ratio/region_mean": 0.0021807126322528347, "epoch": 0.8118985126859143, "grad_norm": 0.15766270458698273, "learning_rate": 1e-06, "loss": -0.0329, "step": 348 }, { "clip_ratio/high_max": 0.002406015759333968, "clip_ratio/high_mean": 0.000929604284465313, "clip_ratio/low_mean": 0.0005244839403530932, "clip_ratio/low_min": 1.4178765923134051e-05, "clip_ratio/region_mean": 0.0014540881966240704, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 920.1563110351562, "completions/mean_terminated_length": 634.2530517578125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.8142315543890347, "grad_norm": 0.24384601414203644, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 55081133.0, "reward": 0.6272321939468384, "reward_std": 0.18013553321361542, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 349 }, { "clip_ratio/high_max": 0.00328760122647509, "clip_ratio/high_mean": 0.0012299432455620263, "clip_ratio/low_mean": 0.0006980443831707817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019279875996289775, "epoch": 0.8165645960921551, "grad_norm": 0.20077718794345856, "learning_rate": 1e-06, "loss": -0.0167, "step": 350 }, { "clip_ratio/high_max": 0.002643643179908395, "clip_ratio/high_mean": 0.001098320553865051, "clip_ratio/low_mean": 0.0009300855526817031, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002028406066528987, "epoch": 0.8188976377952756, "grad_norm": 0.15541011095046997, "learning_rate": 1e-06, "loss": -0.0168, "step": 351 }, { "clip_ratio/high_max": 0.0033104520916822366, "clip_ratio/high_mean": 0.0011763924812839832, "clip_ratio/low_mean": 0.0011006726108462317, "clip_ratio/low_min": 1.2042389244015794e-05, "clip_ratio/region_mean": 0.002277065061207395, "epoch": 0.821230679498396, "grad_norm": 0.1656966656446457, "learning_rate": 1e-06, "loss": -0.0169, "step": 352 }, { "clip_ratio/high_max": 0.00199166498350678, "clip_ratio/high_mean": 0.0008080843690549955, "clip_ratio/low_mean": 0.0005684048828697996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001376489242829848, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 1148.37841796875, "completions/mean_terminated_length": 710.0140991210938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.8235637212015164, "grad_norm": 0.2022244781255722, "learning_rate": 1e-06, "loss": -0.0536, "num_tokens": 55736792.0, "reward": 0.5267857313156128, "reward_std": 0.19903381168842316, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 353 }, { "clip_ratio/high_max": 0.002701366043766029, "clip_ratio/high_mean": 0.0010250792693113908, "clip_ratio/low_mean": 0.0007919001000118442, "clip_ratio/low_min": 1.4249885680328589e-05, "clip_ratio/region_mean": 0.0018169793620472774, "epoch": 0.8258967629046369, "grad_norm": 0.19687455892562866, "learning_rate": 1e-06, "loss": -0.0538, "step": 354 }, { "clip_ratio/high_max": 0.0029620647910633124, "clip_ratio/high_mean": 0.0011355915703461505, "clip_ratio/low_mean": 0.0008949293005571235, "clip_ratio/low_min": 1.922485353134107e-05, "clip_ratio/region_mean": 0.0020305208381614648, "epoch": 0.8282298046077574, "grad_norm": 0.19460690021514893, "learning_rate": 1e-06, "loss": -0.054, "step": 355 }, { "clip_ratio/high_max": 0.0029802091885358095, "clip_ratio/high_mean": 0.0011279366408416536, "clip_ratio/low_mean": 0.0010404508739156881, "clip_ratio/low_min": 1.4249885680328589e-05, "clip_ratio/region_mean": 0.0021683875238522887, "epoch": 0.8305628463108778, "grad_norm": 0.16182830929756165, "learning_rate": 1e-06, "loss": -0.0541, "step": 356 }, { "clip_ratio/high_max": 0.0020283677586121485, "clip_ratio/high_mean": 0.0007928921604616335, "clip_ratio/low_mean": 0.0005745399403167539, "clip_ratio/low_min": 1.913070082082413e-05, "clip_ratio/region_mean": 0.001367432101687882, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 1009.3984985351562, "completions/mean_terminated_length": 694.2841186523438, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.8328958880139983, "grad_norm": 3.546156167984009, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 56388797.0, "reward": 0.5625, "reward_std": 0.16070914268493652, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 357 }, { "clip_ratio/high_max": 0.002098391647450626, "clip_ratio/high_mean": 0.000867415858010645, "clip_ratio/low_mean": 0.0006261759317567339, "clip_ratio/low_min": 9.565350410412066e-06, "clip_ratio/region_mean": 0.0014935918152332306, "epoch": 0.8352289297171187, "grad_norm": 0.1930905133485794, "learning_rate": 1e-06, "loss": -0.0314, "step": 358 }, { "clip_ratio/high_max": 0.002383436687523499, "clip_ratio/high_mean": 0.001010985173707013, "clip_ratio/low_mean": 0.0007677788817090914, "clip_ratio/low_min": 3.826140164164826e-05, "clip_ratio/region_mean": 0.001778764053597115, "epoch": 0.8375619714202391, "grad_norm": 0.17316696047782898, "learning_rate": 1e-06, "loss": -0.0317, "step": 359 }, { "clip_ratio/high_max": 0.0023589047777932137, "clip_ratio/high_mean": 0.0009500104733888293, "clip_ratio/low_mean": 0.0008985684526123805, "clip_ratio/low_min": 1.4437514437304344e-05, "clip_ratio/region_mean": 0.0018485789332771674, "epoch": 0.8398950131233596, "grad_norm": 0.16557767987251282, "learning_rate": 1e-06, "loss": -0.0317, "step": 360 }, { "clip_ratio/high_max": 0.001962165206350619, "clip_ratio/high_mean": 0.0007250951966852881, "clip_ratio/low_mean": 0.0006995256317168241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001424620822945144, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3309.0, "completions/mean_length": 1003.7801513671875, "completions/mean_terminated_length": 658.4950561523438, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.84222805482648, "grad_norm": 0.2210596650838852, "learning_rate": 1e-06, "loss": -0.038, "num_tokens": 57007504.0, "reward": 0.582589328289032, "reward_std": 0.15984968841075897, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 361 }, { "clip_ratio/high_max": 0.0022455156213254668, "clip_ratio/high_mean": 0.0008453439568256726, "clip_ratio/low_mean": 0.0009494814021309139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017948253444046713, "epoch": 0.8445610965296004, "grad_norm": 0.18021905422210693, "learning_rate": 1e-06, "loss": -0.0382, "step": 362 }, { "clip_ratio/high_max": 0.0024326696438947693, "clip_ratio/high_mean": 0.0009686993744253414, "clip_ratio/low_mean": 0.0009222776388924103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001890977036964614, "epoch": 0.8468941382327209, "grad_norm": 0.18369819223880768, "learning_rate": 1e-06, "loss": -0.0383, "step": 363 }, { "clip_ratio/high_max": 0.002655788470292464, "clip_ratio/high_mean": 0.0009370807820232585, "clip_ratio/low_mean": 0.0011387812683096854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002075862023048103, "epoch": 0.8492271799358414, "grad_norm": 0.15883184969425201, "learning_rate": 1e-06, "loss": -0.0384, "step": 364 }, { "clip_ratio/high_max": 0.0019755165158130694, "clip_ratio/high_mean": 0.000805605170171475, "clip_ratio/low_mean": 0.0004138782555855869, "clip_ratio/low_min": 1.236399566550972e-05, "clip_ratio/region_mean": 0.0012194834416732192, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3676.0, "completions/mean_length": 955.3092041015625, "completions/mean_terminated_length": 697.3779907226562, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.8515602216389618, "grad_norm": 0.1878119260072708, "learning_rate": 1e-06, "loss": -0.0404, "num_tokens": 57672013.0, "reward": 0.5993303656578064, "reward_std": 0.17374607920646667, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 365 }, { "clip_ratio/high_max": 0.001958521948836278, "clip_ratio/high_mean": 0.000903451857084292, "clip_ratio/low_mean": 0.0006609357660636306, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015643876104149967, "epoch": 0.8538932633420823, "grad_norm": 0.18378785252571106, "learning_rate": 1e-06, "loss": -0.0405, "step": 366 }, { "clip_ratio/high_max": 0.002294516692927573, "clip_ratio/high_mean": 0.0009600472039892338, "clip_ratio/low_mean": 0.0007042439301585546, "clip_ratio/low_min": 1.236399566550972e-05, "clip_ratio/region_mean": 0.0016642911359667778, "epoch": 0.8562263050452027, "grad_norm": 0.18215297162532806, "learning_rate": 1e-06, "loss": -0.0407, "step": 367 }, { "clip_ratio/high_max": 0.002158739051083103, "clip_ratio/high_mean": 0.0009613855054340092, "clip_ratio/low_mean": 0.0007916008680695086, "clip_ratio/low_min": 2.472799133101944e-05, "clip_ratio/region_mean": 0.001752986354404129, "epoch": 0.8585593467483231, "grad_norm": 0.1608356088399887, "learning_rate": 1e-06, "loss": -0.0407, "step": 368 }, { "clip_ratio/high_max": 0.0023672774113947526, "clip_ratio/high_mean": 0.0009400053750141524, "clip_ratio/low_mean": 0.0006342629667415167, "clip_ratio/low_min": 5.3236795793054625e-05, "clip_ratio/region_mean": 0.0015742683790449519, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3787.0, "completions/mean_length": 1102.20654296875, "completions/mean_terminated_length": 674.5216674804688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.8608923884514436, "grad_norm": 0.2447790503501892, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 58298374.0, "reward": 0.5491071939468384, "reward_std": 0.20760175585746765, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 369 }, { "clip_ratio/high_max": 0.002755924877419602, "clip_ratio/high_mean": 0.0010790827382152202, "clip_ratio/low_mean": 0.0008554469859518576, "clip_ratio/low_min": 5.2428846174734645e-05, "clip_ratio/region_mean": 0.0019345297332620248, "epoch": 0.863225430154564, "grad_norm": 0.18960827589035034, "learning_rate": 1e-06, "loss": -0.0602, "step": 370 }, { "clip_ratio/high_max": 0.0033292944863205776, "clip_ratio/high_mean": 0.001253096339496551, "clip_ratio/low_mean": 0.0008943536913648131, "clip_ratio/low_min": 1.2905224139103666e-05, "clip_ratio/region_mean": 0.002147450046322774, "epoch": 0.8655584718576844, "grad_norm": 0.1930917203426361, "learning_rate": 1e-06, "loss": -0.0603, "step": 371 }, { "clip_ratio/high_max": 0.002852627389074769, "clip_ratio/high_mean": 0.0011816074438684154, "clip_ratio/low_mean": 0.0011715912914951332, "clip_ratio/low_min": 1.3309198948263656e-05, "clip_ratio/region_mean": 0.002353198724449612, "epoch": 0.8678915135608049, "grad_norm": 0.15736685693264008, "learning_rate": 1e-06, "loss": -0.0604, "step": 372 }, { "clip_ratio/high_max": 0.0018621472918312065, "clip_ratio/high_mean": 0.0006013296006130986, "clip_ratio/low_mean": 0.0005976172142254654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011989467748207971, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3337.0, "completions/mean_length": 1004.700927734375, "completions/mean_terminated_length": 668.0247192382812, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.8702245552639254, "grad_norm": 0.24684472382068634, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 58914890.0, "reward": 0.5625, "reward_std": 0.1441766321659088, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 373 }, { "clip_ratio/high_max": 0.002301405394973699, "clip_ratio/high_mean": 0.0007432947477354901, "clip_ratio/low_mean": 0.0007097573652572464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014530521257256623, "epoch": 0.8725575969670458, "grad_norm": 0.17742793262004852, "learning_rate": 1e-06, "loss": -0.0047, "step": 374 }, { "clip_ratio/high_max": 0.002393427006609272, "clip_ratio/high_mean": 0.0008043886718951399, "clip_ratio/low_mean": 0.0007910615577202407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00159545021597296, "epoch": 0.8748906386701663, "grad_norm": 0.20483046770095825, "learning_rate": 1e-06, "loss": -0.0047, "step": 375 }, { "clip_ratio/high_max": 0.002305126210558228, "clip_ratio/high_mean": 0.0007676853474549716, "clip_ratio/low_mean": 0.0011159597052028403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018836450472008437, "epoch": 0.8772236803732867, "grad_norm": 0.15515586733818054, "learning_rate": 1e-06, "loss": -0.0049, "step": 376 }, { "clip_ratio/high_max": 0.0017282192784477957, "clip_ratio/high_mean": 0.0006549659337906633, "clip_ratio/low_mean": 0.0006054146683709405, "clip_ratio/low_min": 3.1037907319841906e-05, "clip_ratio/region_mean": 0.0012603805807884783, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 997.849365234375, "completions/mean_terminated_length": 706.5702514648438, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.8795567220764071, "grad_norm": 0.2025204300880432, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 59581067.0, "reward": 0.5479910969734192, "reward_std": 0.1603696048259735, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 377 }, { "clip_ratio/high_max": 0.0020229816727805883, "clip_ratio/high_mean": 0.000800444134256395, "clip_ratio/low_mean": 0.0007448088426826871, "clip_ratio/low_min": 2.4312759705935605e-05, "clip_ratio/region_mean": 0.0015452529769390821, "epoch": 0.8818897637795275, "grad_norm": 0.2929990589618683, "learning_rate": 1e-06, "loss": -0.0268, "step": 378 }, { "clip_ratio/high_max": 0.002186452205933165, "clip_ratio/high_mean": 0.0008051826143855578, "clip_ratio/low_mean": 0.0008068799816101091, "clip_ratio/low_min": 5.820270234835334e-05, "clip_ratio/region_mean": 0.001612062580534257, "epoch": 0.884222805482648, "grad_norm": 0.16836479306221008, "learning_rate": 1e-06, "loss": -0.027, "step": 379 }, { "clip_ratio/high_max": 0.002006549071666086, "clip_ratio/high_mean": 0.0008284708424071141, "clip_ratio/low_mean": 0.0010751064291980583, "clip_ratio/low_min": 5.672977204085328e-05, "clip_ratio/region_mean": 0.0019035772857023403, "epoch": 0.8865558471857684, "grad_norm": 0.15234163403511047, "learning_rate": 1e-06, "loss": -0.0271, "step": 380 }, { "clip_ratio/high_max": 0.0021033439043094404, "clip_ratio/high_mean": 0.0008251320577983279, "clip_ratio/low_mean": 0.000579738059968804, "clip_ratio/low_min": 1.0672813914425205e-05, "clip_ratio/region_mean": 0.0014048701523279306, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3281.0, "completions/mean_length": 1041.5614013671875, "completions/mean_terminated_length": 717.2630004882812, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 0.8888888888888888, "grad_norm": 0.22845418751239777, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 60245506.0, "reward": 0.5870535969734192, "reward_std": 0.193771630525589, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 381 }, { "clip_ratio/high_max": 0.002718445146456361, "clip_ratio/high_mean": 0.0011191814264748245, "clip_ratio/low_mean": 0.0007366808913502609, "clip_ratio/low_min": 3.058436504943529e-05, "clip_ratio/region_mean": 0.0018558623050921597, "epoch": 0.8912219305920094, "grad_norm": 0.2056245505809784, "learning_rate": 1e-06, "loss": -0.043, "step": 382 }, { "clip_ratio/high_max": 0.0028264226784813218, "clip_ratio/high_mean": 0.0010864172036235686, "clip_ratio/low_mean": 0.0009407932739122771, "clip_ratio/low_min": 1.1415525477787014e-05, "clip_ratio/region_mean": 0.002027210510277655, "epoch": 0.8935549722951298, "grad_norm": 0.18155162036418915, "learning_rate": 1e-06, "loss": -0.0431, "step": 383 }, { "clip_ratio/high_max": 0.002537812317314092, "clip_ratio/high_mean": 0.0010662215718184598, "clip_ratio/low_mean": 0.0010319757038814714, "clip_ratio/low_min": 1.1415525477787014e-05, "clip_ratio/region_mean": 0.002098197284794878, "epoch": 0.8958880139982502, "grad_norm": 0.16482894122600555, "learning_rate": 1e-06, "loss": -0.0433, "step": 384 }, { "clip_ratio/high_max": 0.0024245571403298527, "clip_ratio/high_mean": 0.0010505716418265365, "clip_ratio/low_mean": 0.0008309532749990467, "clip_ratio/low_min": 1.8553886548033915e-05, "clip_ratio/region_mean": 0.0018815248986356892, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2715.0, "completions/mean_length": 1030.72216796875, "completions/mean_terminated_length": 721.9348754882812, "completions/min_length": 207.0, "completions/min_terminated_length": 207.0, "epoch": 0.8982210557013707, "grad_norm": 0.23535828292369843, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 60924457.0, "reward": 0.5189732313156128, "reward_std": 0.2167702466249466, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 385 }, { "clip_ratio/high_max": 0.0029020343208685517, "clip_ratio/high_mean": 0.0012601596317836083, "clip_ratio/low_mean": 0.0010456260861246847, "clip_ratio/low_min": 5.8561335208651144e-05, "clip_ratio/region_mean": 0.0023057857324602082, "epoch": 0.9005540974044911, "grad_norm": 0.2908968925476074, "learning_rate": 1e-06, "loss": -0.0125, "step": 386 }, { "clip_ratio/high_max": 0.00297304404375609, "clip_ratio/high_mean": 0.001287070866965223, "clip_ratio/low_mean": 0.0012656381877604872, "clip_ratio/low_min": 7.592655310872942e-05, "clip_ratio/region_mean": 0.0025527090838295408, "epoch": 0.9028871391076115, "grad_norm": 0.20043176412582397, "learning_rate": 1e-06, "loss": -0.0127, "step": 387 }, { "clip_ratio/high_max": 0.0029717248253291473, "clip_ratio/high_mean": 0.0012672728298639413, "clip_ratio/low_mean": 0.001491492468630895, "clip_ratio/low_min": 0.00012025548221572535, "clip_ratio/region_mean": 0.002758765338512603, "epoch": 0.905220180810732, "grad_norm": 0.19485506415367126, "learning_rate": 1e-06, "loss": -0.0128, "step": 388 }, { "clip_ratio/high_max": 0.0019767155281442683, "clip_ratio/high_mean": 0.0008686156488693086, "clip_ratio/low_mean": 0.0006668734604318161, "clip_ratio/low_min": 3.042407024622662e-05, "clip_ratio/region_mean": 0.001535489114758093, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 1221.3695068359375, "completions/mean_terminated_length": 776.8388671875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.9075532225138524, "grad_norm": 0.31277042627334595, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 61613212.0, "reward": 0.4810267984867096, "reward_std": 0.20621488988399506, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991899728775024, "step": 389 }, { "clip_ratio/high_max": 0.002609794813906774, "clip_ratio/high_mean": 0.0010951280601148028, "clip_ratio/low_mean": 0.0007735913277429063, "clip_ratio/low_min": 4.9226028750126716e-05, "clip_ratio/region_mean": 0.0018687194024096243, "epoch": 0.9098862642169728, "grad_norm": 0.18411730229854584, "learning_rate": 1e-06, "loss": -0.058, "step": 390 }, { "clip_ratio/high_max": 0.0026955270004691556, "clip_ratio/high_mean": 0.001119035669034929, "clip_ratio/low_mean": 0.0009240042563760653, "clip_ratio/low_min": 7.008247666817624e-05, "clip_ratio/region_mean": 0.0020430399672477506, "epoch": 0.9122193059200934, "grad_norm": 0.1755540519952774, "learning_rate": 1e-06, "loss": -0.0581, "step": 391 }, { "clip_ratio/high_max": 0.002759647206403315, "clip_ratio/high_mean": 0.0010707116070989287, "clip_ratio/low_mean": 0.001155722657131264, "clip_ratio/low_min": 9.547603622195311e-05, "clip_ratio/region_mean": 0.0022264342405833304, "epoch": 0.9145523476232138, "grad_norm": 0.15837901830673218, "learning_rate": 1e-06, "loss": -0.0582, "step": 392 }, { "clip_ratio/high_max": 0.0022227497611311264, "clip_ratio/high_mean": 0.0009585925054125255, "clip_ratio/low_mean": 0.0006712236408930039, "clip_ratio/low_min": 6.128730183263542e-05, "clip_ratio/region_mean": 0.0016298161208396778, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 1159.6640625, "completions/mean_terminated_length": 727.2970581054688, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.9168853893263342, "grad_norm": 0.20776188373565674, "learning_rate": 1e-06, "loss": -0.0439, "num_tokens": 62280447.0, "reward": 0.5167410969734192, "reward_std": 0.1989564150571823, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 393 }, { "clip_ratio/high_max": 0.0023745885482640006, "clip_ratio/high_mean": 0.001091882189939497, "clip_ratio/low_mean": 0.0008926317477744306, "clip_ratio/low_min": 8.839757902023848e-05, "clip_ratio/region_mean": 0.001984513917705044, "epoch": 0.9192184310294547, "grad_norm": 0.216299369931221, "learning_rate": 1e-06, "loss": -0.044, "step": 394 }, { "clip_ratio/high_max": 0.0025957033649319783, "clip_ratio/high_mean": 0.0011341691060806625, "clip_ratio/low_mean": 0.0010196714338235324, "clip_ratio/low_min": 0.00013542869237426203, "clip_ratio/region_mean": 0.0021538405708270147, "epoch": 0.9215514727325751, "grad_norm": 0.17864838242530823, "learning_rate": 1e-06, "loss": -0.0443, "step": 395 }, { "clip_ratio/high_max": 0.0028884730709251016, "clip_ratio/high_mean": 0.0011756976273318287, "clip_ratio/low_mean": 0.0011388604943931568, "clip_ratio/low_min": 7.298564014490694e-05, "clip_ratio/region_mean": 0.002314558172656689, "epoch": 0.9238845144356955, "grad_norm": 0.18394559621810913, "learning_rate": 1e-06, "loss": -0.0443, "step": 396 }, { "clip_ratio/high_max": 0.0020353328618512023, "clip_ratio/high_mean": 0.0007871496127336286, "clip_ratio/low_mean": 0.0005249154464763706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013120650728524197, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 1195.01123046875, "completions/mean_terminated_length": 733.4049072265625, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.926217556138816, "grad_norm": 0.25119495391845703, "learning_rate": 1e-06, "loss": -0.056, "num_tokens": 62937633.0, "reward": 0.559151828289032, "reward_std": 0.16288875043392181, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 397 }, { "clip_ratio/high_max": 0.002287545292347204, "clip_ratio/high_mean": 0.0008342068613274023, "clip_ratio/low_mean": 0.0007482967857868061, "clip_ratio/low_min": 3.0637256713816896e-05, "clip_ratio/region_mean": 0.0015825036643946078, "epoch": 0.9285505978419364, "grad_norm": 0.19131989777088165, "learning_rate": 1e-06, "loss": -0.0562, "step": 398 }, { "clip_ratio/high_max": 0.0024469990748912096, "clip_ratio/high_mean": 0.0009432288206880912, "clip_ratio/low_mean": 0.0008438600398221752, "clip_ratio/low_min": 2.3755226720822975e-05, "clip_ratio/region_mean": 0.001787088876881171, "epoch": 0.9308836395450568, "grad_norm": 0.16210754215717316, "learning_rate": 1e-06, "loss": -0.0563, "step": 399 }, { "clip_ratio/high_max": 0.0024581043544458225, "clip_ratio/high_mean": 0.0009209176769218175, "clip_ratio/low_mean": 0.0009729203411552589, "clip_ratio/low_min": 3.580977863748558e-05, "clip_ratio/region_mean": 0.0018938379944302142, "epoch": 0.9332166812481774, "grad_norm": 0.2024601250886917, "learning_rate": 1e-06, "loss": -0.0564, "step": 400 }, { "clip_ratio/high_max": 0.0025209817540599033, "clip_ratio/high_mean": 0.0010594668910925975, "clip_ratio/low_mean": 0.00047258918903025915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015320560851250775, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 1154.279052734375, "completions/mean_terminated_length": 708.1054077148438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.9355497229512978, "grad_norm": 0.2300882786512375, "learning_rate": 1e-06, "loss": -0.0593, "num_tokens": 63574059.0, "reward": 0.5725446939468384, "reward_std": 0.1989564299583435, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 401 }, { "clip_ratio/high_max": 0.003005019068950787, "clip_ratio/high_mean": 0.0012672556804318447, "clip_ratio/low_mean": 0.0006670679576927796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019343237072462216, "epoch": 0.9378827646544182, "grad_norm": 0.1934894323348999, "learning_rate": 1e-06, "loss": -0.0595, "step": 402 }, { "clip_ratio/high_max": 0.002924209213233553, "clip_ratio/high_mean": 0.001274744514375925, "clip_ratio/low_mean": 0.000834423901324044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002109168446622789, "epoch": 0.9402158063575387, "grad_norm": 0.18593506515026093, "learning_rate": 1e-06, "loss": -0.0597, "step": 403 }, { "clip_ratio/high_max": 0.0032100193566293456, "clip_ratio/high_mean": 0.0013261890253488673, "clip_ratio/low_mean": 0.0009882827889668988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023144718215917237, "epoch": 0.9425488480606591, "grad_norm": 0.19097992777824402, "learning_rate": 1e-06, "loss": -0.0598, "step": 404 }, { "clip_ratio/high_max": 0.0019859337553498335, "clip_ratio/high_mean": 0.0007406607492157491, "clip_ratio/low_mean": 0.0007533457910540164, "clip_ratio/low_min": 5.200455962039996e-05, "clip_ratio/region_mean": 0.0014940065229893662, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 1077.921875, "completions/mean_terminated_length": 719.9725341796875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.9448818897637795, "grad_norm": 0.8377181887626648, "learning_rate": 1e-06, "loss": -0.0616, "num_tokens": 64250669.0, "reward": 0.543526828289032, "reward_std": 0.20737352967262268, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 405 }, { "clip_ratio/high_max": 0.0022347445265040733, "clip_ratio/high_mean": 0.0008671173181937775, "clip_ratio/low_mean": 0.0009420438291272148, "clip_ratio/low_min": 1.6571655578445643e-05, "clip_ratio/region_mean": 0.001809161141864024, "epoch": 0.9472149314669, "grad_norm": 0.24612660706043243, "learning_rate": 1e-06, "loss": -0.0617, "step": 406 }, { "clip_ratio/high_max": 0.0027180582510482054, "clip_ratio/high_mean": 0.0010046696952485945, "clip_ratio/low_mean": 0.0012112204567529261, "clip_ratio/low_min": 8.970411363407038e-05, "clip_ratio/region_mean": 0.0022158901701914147, "epoch": 0.9495479731700204, "grad_norm": 0.19877082109451294, "learning_rate": 1e-06, "loss": -0.062, "step": 407 }, { "clip_ratio/high_max": 0.002539428031013813, "clip_ratio/high_mean": 0.0009243737276847241, "clip_ratio/low_mean": 0.0013382744837144855, "clip_ratio/low_min": 7.317226481973194e-05, "clip_ratio/region_mean": 0.0022626482459600084, "epoch": 0.9518810148731408, "grad_norm": 0.1779702603816986, "learning_rate": 1e-06, "loss": -0.062, "step": 408 }, { "clip_ratio/high_max": 0.0018373513303231448, "clip_ratio/high_mean": 0.0007442679834639421, "clip_ratio/low_mean": 0.0005523934905795613, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012966614958713762, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3964.0, "completions/mean_length": 1261.8013916015625, "completions/mean_terminated_length": 767.7667236328125, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.9542140565762613, "grad_norm": 0.3581889271736145, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 64923939.0, "reward": 0.4810267984867096, "reward_std": 0.17250937223434448, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991896748542786, "step": 409 }, { "clip_ratio/high_max": 0.002253194361401256, "clip_ratio/high_mean": 0.000915467542654369, "clip_ratio/low_mean": 0.0007152687176130712, "clip_ratio/low_min": 1.1188685675733723e-05, "clip_ratio/region_mean": 0.001630736245715525, "epoch": 0.9565470982793818, "grad_norm": 0.18749943375587463, "learning_rate": 1e-06, "loss": -0.041, "step": 410 }, { "clip_ratio/high_max": 0.0019923243526136503, "clip_ratio/high_mean": 0.0008239964536187472, "clip_ratio/low_mean": 0.0007722066748101497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015962031320668757, "epoch": 0.9588801399825022, "grad_norm": 0.16970683634281158, "learning_rate": 1e-06, "loss": -0.0411, "step": 411 }, { "clip_ratio/high_max": 0.0024162078989320435, "clip_ratio/high_mean": 0.000988130228506634, "clip_ratio/low_mean": 0.0009604281567590078, "clip_ratio/low_min": 3.356605884619057e-05, "clip_ratio/region_mean": 0.0019485583397909068, "epoch": 0.9612131816856226, "grad_norm": 1351.603271484375, "learning_rate": 1e-06, "loss": 0.0538, "step": 412 }, { "clip_ratio/high_max": 0.0022055149493098725, "clip_ratio/high_mean": 0.0009358815241284901, "clip_ratio/low_mean": 0.0006885692337164073, "clip_ratio/low_min": 1.1273448762949556e-05, "clip_ratio/region_mean": 0.00162445073510753, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 1107.4342041015625, "completions/mean_terminated_length": 706.4367065429688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.9635462233887431, "grad_norm": 0.5329749584197998, "learning_rate": 1e-06, "loss": -0.0722, "num_tokens": 65574968.0, "reward": 0.5379464626312256, "reward_std": 0.21091002225875854, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 413 }, { "clip_ratio/high_max": 0.002682222479052143, "clip_ratio/high_mean": 0.0011359051113686292, "clip_ratio/low_mean": 0.0008874069883404445, "clip_ratio/low_min": 2.9952076147310436e-05, "clip_ratio/region_mean": 0.002023312117671594, "epoch": 0.9658792650918635, "grad_norm": 0.23891282081604004, "learning_rate": 1e-06, "loss": -0.0726, "step": 414 }, { "clip_ratio/high_max": 0.002633215823152568, "clip_ratio/high_mean": 0.0011325615814712364, "clip_ratio/low_mean": 0.001091472344342037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022240339530981146, "epoch": 0.9682123067949839, "grad_norm": 0.21334873139858246, "learning_rate": 1e-06, "loss": -0.0728, "step": 415 }, { "clip_ratio/high_max": 0.0027172141017217655, "clip_ratio/high_mean": 0.0011938353763980558, "clip_ratio/low_mean": 0.001277945382753387, "clip_ratio/low_min": 1.1273448762949556e-05, "clip_ratio/region_mean": 0.0024717807245906442, "epoch": 0.9705453484981044, "grad_norm": 0.19429868459701538, "learning_rate": 1e-06, "loss": -0.0729, "step": 416 }, { "clip_ratio/high_max": 0.0025263316529162694, "clip_ratio/high_mean": 0.0009484534257353516, "clip_ratio/low_mean": 0.0006053798088032636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015538332518190145, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 1024.0535888671875, "completions/mean_terminated_length": 651.1138916015625, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.9728783902012248, "grad_norm": 47.30574417114258, "learning_rate": 1e-06, "loss": -0.0474, "num_tokens": 66183104.0, "reward": 0.5446428656578064, "reward_std": 0.1943764090538025, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 417 }, { "clip_ratio/high_max": 0.0027300925212330185, "clip_ratio/high_mean": 0.0009911583838402294, "clip_ratio/low_mean": 0.000642533374048071, "clip_ratio/low_min": 1.2235708709340543e-05, "clip_ratio/region_mean": 0.0016336917324224487, "epoch": 0.9752114319043453, "grad_norm": 0.27597111463546753, "learning_rate": 1e-06, "loss": -0.0586, "step": 418 }, { "clip_ratio/high_max": 0.0028693240819848143, "clip_ratio/high_mean": 0.0011185532221134054, "clip_ratio/low_mean": 0.0008494097673974466, "clip_ratio/low_min": 2.4471417418681085e-05, "clip_ratio/region_mean": 0.001967963075003354, "epoch": 0.9775444736074658, "grad_norm": 0.24335071444511414, "learning_rate": 1e-06, "loss": -0.0589, "step": 419 }, { "clip_ratio/high_max": 0.0036550228542182595, "clip_ratio/high_mean": 0.0013008051428187173, "clip_ratio/low_mean": 0.001135048745709355, "clip_ratio/low_min": 2.4471417418681085e-05, "clip_ratio/region_mean": 0.0024358538721571676, "epoch": 0.9798775153105862, "grad_norm": 127.47596740722656, "learning_rate": 1e-06, "loss": -0.0504, "step": 420 }, { "clip_ratio/high_max": 0.0017617841876926832, "clip_ratio/high_mean": 0.0007148251279431861, "clip_ratio/low_mean": 0.0007903993482614169, "clip_ratio/low_min": 5.005026650906075e-05, "clip_ratio/region_mean": 0.0015052244416438043, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 1086.7445068359375, "completions/mean_terminated_length": 700.1649780273438, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.9822105570137066, "grad_norm": 0.24364274740219116, "learning_rate": 1e-06, "loss": -0.0503, "num_tokens": 66821227.0, "reward": 0.5546875, "reward_std": 0.18272952735424042, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 421 }, { "clip_ratio/high_max": 0.002380145473580342, "clip_ratio/high_mean": 0.0009163302838715026, "clip_ratio/low_mean": 0.0009065432532224804, "clip_ratio/low_min": 7.691667815379333e-05, "clip_ratio/region_mean": 0.0018228735716547817, "epoch": 0.9845435987168271, "grad_norm": 0.2174522578716278, "learning_rate": 1e-06, "loss": -0.0505, "step": 422 }, { "clip_ratio/high_max": 0.002334899800189305, "clip_ratio/high_mean": 0.000951287106545351, "clip_ratio/low_mean": 0.0010699750164349098, "clip_ratio/low_min": 8.663720291224308e-05, "clip_ratio/region_mean": 0.0020212621602695435, "epoch": 0.9868766404199475, "grad_norm": 0.1690983921289444, "learning_rate": 1e-06, "loss": -0.0506, "step": 423 }, { "clip_ratio/high_max": 0.002185624703997746, "clip_ratio/high_mean": 0.0008677510741108563, "clip_ratio/low_mean": 0.0012600752525031567, "clip_ratio/low_min": 7.493461089325137e-05, "clip_ratio/region_mean": 0.0021278263011481613, "epoch": 0.9892096821230679, "grad_norm": 0.23601838946342468, "learning_rate": 1e-06, "loss": -0.0507, "step": 424 }, { "clip_ratio/high_max": 0.0022686846714350395, "clip_ratio/high_mean": 0.00090493409516057, "clip_ratio/low_mean": 0.000638129553408362, "clip_ratio/low_min": 3.0685138881381135e-05, "clip_ratio/region_mean": 0.001543063630379038, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 1167.107177734375, "completions/mean_terminated_length": 647.5269165039062, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.9915427238261884, "grad_norm": 0.23493416607379913, "learning_rate": 1e-06, "loss": -0.0629, "num_tokens": 67410499.0, "reward": 0.5368303656578064, "reward_std": 0.1792358011007309, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 425 }, { "clip_ratio/high_max": 0.0027551185485208407, "clip_ratio/high_mean": 0.0010716854994825553, "clip_ratio/low_mean": 0.0008940097304730443, "clip_ratio/low_min": 6.581964589713607e-05, "clip_ratio/region_mean": 0.0019656952790683135, "epoch": 0.9938757655293088, "grad_norm": 0.21279995143413544, "learning_rate": 1e-06, "loss": -0.0632, "step": 426 }, { "clip_ratio/high_max": 0.002672596645425074, "clip_ratio/high_mean": 0.0010569844453129917, "clip_ratio/low_mean": 0.0010575091819191584, "clip_ratio/low_min": 0.00011861113853228744, "clip_ratio/region_mean": 0.0021144935963093303, "epoch": 0.9962088072324293, "grad_norm": 0.1970187872648239, "learning_rate": 1e-06, "loss": -0.0633, "step": 427 }, { "clip_ratio/high_max": 0.0029313290724530816, "clip_ratio/high_mean": 0.001172023570688907, "clip_ratio/low_mean": 0.0012444996391423047, "clip_ratio/low_min": 0.0001430197535228217, "clip_ratio/region_mean": 0.0024165231734514236, "epoch": 0.9985418489355498, "grad_norm": 0.18373264372348785, "learning_rate": 1e-06, "loss": -0.0634, "step": 428 }, { "clip_ratio/high_max": 0.002497360503184609, "clip_ratio/high_mean": 0.0010106641693710117, "clip_ratio/low_mean": 0.0008426159311056836, "clip_ratio/low_min": 2.4718212443985976e-05, "clip_ratio/region_mean": 0.0018532800932007376, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3232.0, "completions/mean_length": 1041.1160888671875, "completions/mean_terminated_length": 683.0623779296875, "completions/min_length": 207.0, "completions/min_terminated_length": 207.0, "epoch": 1.0023330417031204, "grad_norm": 0.5928906202316284, "learning_rate": 1e-06, "loss": -0.0565, "num_tokens": 68048739.0, "reward": 0.6071428656578064, "reward_std": 0.21665966510772705, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 429 }, { "clip_ratio/high_max": 0.0031892494298517704, "clip_ratio/high_mean": 0.001262467852939153, "clip_ratio/low_mean": 0.0009467488143855007, "clip_ratio/low_min": 1.0694730008253828e-05, "clip_ratio/region_mean": 0.0022092167346272618, "epoch": 1.0046660834062409, "grad_norm": 0.2119440734386444, "learning_rate": 1e-06, "loss": -0.0568, "step": 430 }, { "clip_ratio/high_max": 0.0033243998550460674, "clip_ratio/high_mean": 0.0012677034792432096, "clip_ratio/low_mean": 0.0011940883669012692, "clip_ratio/low_min": 2.852578654710669e-05, "clip_ratio/region_mean": 0.00246179175155703, "epoch": 1.0069991251093613, "grad_norm": 0.23959796130657196, "learning_rate": 1e-06, "loss": -0.0569, "step": 431 }, { "clip_ratio/high_max": 0.0034948404136230238, "clip_ratio/high_mean": 0.001382044485580991, "clip_ratio/low_mean": 0.001366867854812881, "clip_ratio/low_min": 3.7077319575473666e-05, "clip_ratio/region_mean": 0.002748912331298925, "epoch": 1.0093321668124817, "grad_norm": 0.3138231635093689, "learning_rate": 1e-06, "loss": -0.057, "step": 432 }, { "clip_ratio/high_max": 0.0022635277855442837, "clip_ratio/high_mean": 0.0009683400385256391, "clip_ratio/low_mean": 0.0006374351250997279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001605775127245579, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3415.0, "completions/mean_length": 1172.515625, "completions/mean_terminated_length": 680.8213500976562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 1.0116652085156022, "grad_norm": 0.26457658410072327, "learning_rate": 1e-06, "loss": -0.0874, "num_tokens": 68667377.0, "reward": 0.5613839626312256, "reward_std": 0.19238194823265076, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 433 }, { "clip_ratio/high_max": 0.002553269347117748, "clip_ratio/high_mean": 0.0010748338700068416, "clip_ratio/low_mean": 0.0008445240473520244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019193579501006752, "epoch": 1.0139982502187226, "grad_norm": 0.21261554956436157, "learning_rate": 1e-06, "loss": -0.0876, "step": 434 }, { "clip_ratio/high_max": 0.002998109306645347, "clip_ratio/high_mean": 0.0011644164078461472, "clip_ratio/low_mean": 0.0008899917593225837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002054408148978837, "epoch": 1.016331291921843, "grad_norm": 0.18102584779262543, "learning_rate": 1e-06, "loss": -0.0878, "step": 435 }, { "clip_ratio/high_max": 0.0026607739491737448, "clip_ratio/high_mean": 0.0011617568125075195, "clip_ratio/low_mean": 0.001095112253096886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022568690983462147, "epoch": 1.0186643336249634, "grad_norm": 0.17100775241851807, "learning_rate": 1e-06, "loss": -0.0879, "step": 436 }, { "clip_ratio/high_max": 0.0022402321992558427, "clip_ratio/high_mean": 0.0010755455223261379, "clip_ratio/low_mean": 0.0005675919264831464, "clip_ratio/low_min": 3.897717670042766e-05, "clip_ratio/region_mean": 0.0016431374606327154, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 1253.989990234375, "completions/mean_terminated_length": 700.7453002929688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.020997375328084, "grad_norm": 0.2603064477443695, "learning_rate": 1e-06, "loss": -0.0585, "num_tokens": 69285936.0, "reward": 0.527901828289032, "reward_std": 0.19505222141742706, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 437 }, { "clip_ratio/high_max": 0.002369270281633362, "clip_ratio/high_mean": 0.0011190426885150373, "clip_ratio/low_mean": 0.000735484581127821, "clip_ratio/low_min": 2.7120308914163616e-05, "clip_ratio/region_mean": 0.001854527225077618, "epoch": 1.0233304170312045, "grad_norm": 0.20056387782096863, "learning_rate": 1e-06, "loss": -0.0588, "step": 438 }, { "clip_ratio/high_max": 0.002552493824623525, "clip_ratio/high_mean": 0.0011975535308010876, "clip_ratio/low_mean": 0.0008490620848533581, "clip_ratio/low_min": 2.996643706865143e-05, "clip_ratio/region_mean": 0.0020466156129259616, "epoch": 1.025663458734325, "grad_norm": 0.24322101473808289, "learning_rate": 1e-06, "loss": -0.0589, "step": 439 }, { "clip_ratio/high_max": 0.0027461221689009108, "clip_ratio/high_mean": 0.0011773703627113719, "clip_ratio/low_mean": 0.000981432478511124, "clip_ratio/low_min": 4.199771865387447e-05, "clip_ratio/region_mean": 0.0021588028175756335, "epoch": 1.0279965004374454, "grad_norm": 0.2547197937965393, "learning_rate": 1e-06, "loss": -0.059, "step": 440 }, { "clip_ratio/high_max": 0.002163559831387829, "clip_ratio/high_mean": 0.0007196046262833988, "clip_ratio/low_mean": 0.0005012099636587664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001220814599946607, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3795.0, "completions/mean_length": 1232.48779296875, "completions/mean_terminated_length": 702.2076416015625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.0303295421405658, "grad_norm": 0.2406734973192215, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 69921181.0, "reward": 0.5178571939468384, "reward_std": 0.1515759527683258, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 441 }, { "clip_ratio/high_max": 0.0031208639265969396, "clip_ratio/high_mean": 0.0009395222896273481, "clip_ratio/low_mean": 0.0006050529918866232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015445752615050878, "epoch": 1.0326625838436863, "grad_norm": 0.34198862314224243, "learning_rate": 1e-06, "loss": -0.0429, "step": 442 }, { "clip_ratio/high_max": 0.0028876431606477126, "clip_ratio/high_mean": 0.0010055548355012434, "clip_ratio/low_mean": 0.0007337094093600172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017392642330378294, "epoch": 1.0349956255468067, "grad_norm": 0.2106492668390274, "learning_rate": 1e-06, "loss": -0.0432, "step": 443 }, { "clip_ratio/high_max": 0.002413209236692637, "clip_ratio/high_mean": 0.0008520501432940364, "clip_ratio/low_mean": 0.0008652512897242559, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017173014130094089, "epoch": 1.0373286672499271, "grad_norm": 0.16405101120471954, "learning_rate": 1e-06, "loss": -0.0432, "step": 444 }, { "clip_ratio/high_max": 0.0016530153297935612, "clip_ratio/high_mean": 0.0007504325149056967, "clip_ratio/low_mean": 0.0005937825026194332, "clip_ratio/low_min": 1.5660236385883763e-05, "clip_ratio/region_mean": 0.0013442150375340134, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3147.0, "completions/mean_length": 1083.50341796875, "completions/mean_terminated_length": 670.6231079101562, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.0396617089530475, "grad_norm": 0.2528078556060791, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 70544208.0, "reward": 0.5993303656578064, "reward_std": 0.17186744511127472, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 445 }, { "clip_ratio/high_max": 0.002137284151103813, "clip_ratio/high_mean": 0.0008763888454268454, "clip_ratio/low_mean": 0.0006821734368713805, "clip_ratio/low_min": 1.2687779417319689e-05, "clip_ratio/region_mean": 0.0015585622750222683, "epoch": 1.041994750656168, "grad_norm": 0.21493834257125854, "learning_rate": 1e-06, "loss": -0.0432, "step": 446 }, { "clip_ratio/high_max": 0.0023032074168440886, "clip_ratio/high_mean": 0.0008935389614634914, "clip_ratio/low_mean": 0.0008167138876160607, "clip_ratio/low_min": 3.806333916145377e-05, "clip_ratio/region_mean": 0.0017102528290706687, "epoch": 1.0443277923592884, "grad_norm": 0.16642822325229645, "learning_rate": 1e-06, "loss": -0.0434, "step": 447 }, { "clip_ratio/high_max": 0.0021593002675217576, "clip_ratio/high_mean": 0.0009112338157137856, "clip_ratio/low_mean": 0.00088441349362256, "clip_ratio/low_min": 4.698070551967248e-05, "clip_ratio/region_mean": 0.0017956472947844304, "epoch": 1.0466608340624088, "grad_norm": 0.16769467294216156, "learning_rate": 1e-06, "loss": -0.0434, "step": 448 }, { "clip_ratio/high_max": 0.002010406751651317, "clip_ratio/high_mean": 0.000719288325853995, "clip_ratio/low_mean": 0.0006601368022529641, "clip_ratio/low_min": 1.1032656402676366e-05, "clip_ratio/region_mean": 0.0013794251281069592, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3775.0, "completions/mean_length": 1208.61279296875, "completions/mean_terminated_length": 696.3955078125, "completions/min_length": 210.0, "completions/min_terminated_length": 210.0, "epoch": 1.0489938757655293, "grad_norm": 0.2198099046945572, "learning_rate": 1e-06, "loss": -0.0745, "num_tokens": 71164797.0, "reward": 0.5569196939468384, "reward_std": 0.16755138337612152, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 449 }, { "clip_ratio/high_max": 0.002501886283425847, "clip_ratio/high_mean": 0.0009829985538090114, "clip_ratio/low_mean": 0.0008431503956671804, "clip_ratio/low_min": 2.7382257030694745e-05, "clip_ratio/region_mean": 0.0018261489567521494, "epoch": 1.0513269174686497, "grad_norm": 0.2172139286994934, "learning_rate": 1e-06, "loss": -0.0747, "step": 450 }, { "clip_ratio/high_max": 0.0027076808401034214, "clip_ratio/high_mean": 0.0009232839001924731, "clip_ratio/low_mean": 0.0009586244605088723, "clip_ratio/low_min": 4.4130625610705465e-05, "clip_ratio/region_mean": 0.0018819083088601474, "epoch": 1.0536599591717701, "grad_norm": 0.1754942536354065, "learning_rate": 1e-06, "loss": -0.0748, "step": 451 }, { "clip_ratio/high_max": 0.0023701952704868745, "clip_ratio/high_mean": 0.0008908379331842298, "clip_ratio/low_mean": 0.0011827458984043915, "clip_ratio/low_min": 3.30979710270185e-05, "clip_ratio/region_mean": 0.0020735837970278226, "epoch": 1.0559930008748906, "grad_norm": 0.16789038479328156, "learning_rate": 1e-06, "loss": -0.0749, "step": 452 }, { "clip_ratio/high_max": 0.0025380053630215116, "clip_ratio/high_mean": 0.000866484235302778, "clip_ratio/low_mean": 0.0005177533403184498, "clip_ratio/low_min": 3.7185887777013704e-05, "clip_ratio/region_mean": 0.0013842375847161748, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 1297.6328125, "completions/mean_terminated_length": 689.2921142578125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 1.058326042578011, "grad_norm": 0.2575956881046295, "learning_rate": 1e-06, "loss": -0.0544, "num_tokens": 71783564.0, "reward": 0.4843750298023224, "reward_std": 0.17055727541446686, "rewards/verify_math_reward/mean": 0.484375, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 453 }, { "clip_ratio/high_max": 0.003247183994972147, "clip_ratio/high_mean": 0.001136523893364938, "clip_ratio/low_mean": 0.0007282457563633216, "clip_ratio/low_min": 1.2176114978501573e-05, "clip_ratio/region_mean": 0.0018647696888365317, "epoch": 1.0606590842811314, "grad_norm": 0.20209437608718872, "learning_rate": 1e-06, "loss": -0.0546, "step": 454 }, { "clip_ratio/high_max": 0.003685488656628877, "clip_ratio/high_mean": 0.001236621774296509, "clip_ratio/low_mean": 0.0008430483003394329, "clip_ratio/low_min": 4.909283234155737e-05, "clip_ratio/region_mean": 0.0020796700919163413, "epoch": 1.0629921259842519, "grad_norm": 0.1865995079278946, "learning_rate": 1e-06, "loss": -0.0548, "step": 455 }, { "clip_ratio/high_max": 0.003276666277088225, "clip_ratio/high_mean": 0.0011205308182979934, "clip_ratio/low_mean": 0.0009250333259842591, "clip_ratio/low_min": 4.8502250137971714e-05, "clip_ratio/region_mean": 0.0020455641570151784, "epoch": 1.0653251676873725, "grad_norm": 0.18742579221725464, "learning_rate": 1e-06, "loss": -0.0548, "step": 456 }, { "clip_ratio/high_max": 0.0022046166996005923, "clip_ratio/high_mean": 0.0008413064897467848, "clip_ratio/low_mean": 0.0005927354313826072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014340419438667595, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 1144.6741943359375, "completions/mean_terminated_length": 661.7298583984375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 1.067658209390493, "grad_norm": 0.25028684735298157, "learning_rate": 1e-06, "loss": -0.0535, "num_tokens": 72381008.0, "reward": 0.5725446939468384, "reward_std": 0.17107580602169037, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 457 }, { "clip_ratio/high_max": 0.0026352396598667838, "clip_ratio/high_mean": 0.0010579805675661191, "clip_ratio/low_mean": 0.000667972421979357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017259529704460874, "epoch": 1.0699912510936134, "grad_norm": 0.26422417163848877, "learning_rate": 1e-06, "loss": -0.0536, "step": 458 }, { "clip_ratio/high_max": 0.002492845287633827, "clip_ratio/high_mean": 0.001014596256936784, "clip_ratio/low_mean": 0.000821308330159809, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018359045934630558, "epoch": 1.0723242927967338, "grad_norm": 0.17433074116706848, "learning_rate": 1e-06, "loss": -0.0537, "step": 459 }, { "clip_ratio/high_max": 0.0025103687730734237, "clip_ratio/high_mean": 0.001026009729685029, "clip_ratio/low_mean": 0.0010625150316627696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002088524808641523, "epoch": 1.0746573344998542, "grad_norm": 0.20946235954761505, "learning_rate": 1e-06, "loss": -0.0538, "step": 460 }, { "clip_ratio/high_max": 0.002273993319249712, "clip_ratio/high_mean": 0.0009915009231917793, "clip_ratio/low_mean": 0.0006304622429524898, "clip_ratio/low_min": 5.316049646353349e-05, "clip_ratio/region_mean": 0.0016219631652347744, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 1157.474365234375, "completions/mean_terminated_length": 672.1781616210938, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.0769903762029747, "grad_norm": 0.2810959815979004, "learning_rate": 1e-06, "loss": -0.0758, "num_tokens": 72985753.0, "reward": 0.6238839626312256, "reward_std": 0.18648287653923035, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 461 }, { "clip_ratio/high_max": 0.002901716965425294, "clip_ratio/high_mean": 0.001183644104457926, "clip_ratio/low_mean": 0.000750970037188381, "clip_ratio/low_min": 3.065189775952604e-05, "clip_ratio/region_mean": 0.0019346141489222646, "epoch": 1.079323417906095, "grad_norm": 0.42501500248908997, "learning_rate": 1e-06, "loss": -0.0759, "step": 462 }, { "clip_ratio/high_max": 0.0029904071634518914, "clip_ratio/high_mean": 0.001165322115411982, "clip_ratio/low_mean": 0.0008729879136808449, "clip_ratio/low_min": 2.6499894374865107e-05, "clip_ratio/region_mean": 0.0020383100418257527, "epoch": 1.0816564596092155, "grad_norm": 0.21801304817199707, "learning_rate": 1e-06, "loss": -0.0761, "step": 463 }, { "clip_ratio/high_max": 0.003126786665234249, "clip_ratio/high_mean": 0.0012181888523628004, "clip_ratio/low_mean": 0.0009937214454112109, "clip_ratio/low_min": 5.9281984249537345e-05, "clip_ratio/region_mean": 0.00221191030141199, "epoch": 1.083989501312336, "grad_norm": 0.18408824503421783, "learning_rate": 1e-06, "loss": -0.0763, "step": 464 }, { "clip_ratio/high_max": 0.0022430877506849356, "clip_ratio/high_mean": 0.0009697070963738952, "clip_ratio/low_mean": 0.0006000940375088248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015698011120548472, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3847.0, "completions/mean_length": 1048.6429443359375, "completions/mean_terminated_length": 648.48486328125, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 1.0863225430154564, "grad_norm": 0.3102148771286011, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 73584473.0, "reward": 0.5625, "reward_std": 0.1794205754995346, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 465 }, { "clip_ratio/high_max": 0.0029570161132141948, "clip_ratio/high_mean": 0.0011562057115952484, "clip_ratio/low_mean": 0.0008806782461761031, "clip_ratio/low_min": 1.597648224560544e-05, "clip_ratio/region_mean": 0.0020368839323055, "epoch": 1.0886555847185768, "grad_norm": 0.21077482402324677, "learning_rate": 1e-06, "loss": -0.0416, "step": 466 }, { "clip_ratio/high_max": 0.0029258788490551524, "clip_ratio/high_mean": 0.0012092641954950523, "clip_ratio/low_mean": 0.0008873563128872775, "clip_ratio/low_min": 1.683274967945181e-05, "clip_ratio/region_mean": 0.002096620519296266, "epoch": 1.0909886264216972, "grad_norm": 0.195527121424675, "learning_rate": 1e-06, "loss": -0.0417, "step": 467 }, { "clip_ratio/high_max": 0.0028943870929651894, "clip_ratio/high_mean": 0.0012042640955769457, "clip_ratio/low_mean": 0.0011328622058499604, "clip_ratio/low_min": 1.1968594662903342e-05, "clip_ratio/region_mean": 0.0023371263159788214, "epoch": 1.0933216681248177, "grad_norm": 0.19724786281585693, "learning_rate": 1e-06, "loss": -0.0418, "step": 468 }, { "clip_ratio/high_max": 0.00233584710440482, "clip_ratio/high_mean": 0.0009262848052458139, "clip_ratio/low_mean": 0.0005049029941801564, "clip_ratio/low_min": 1.3652250345330685e-05, "clip_ratio/region_mean": 0.001431187800335465, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 1110.7310791015625, "completions/mean_terminated_length": 671.1587524414062, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 1.0956547098279381, "grad_norm": 0.2988039255142212, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 74183000.0, "reward": 0.625, "reward_std": 0.17938736081123352, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 469 }, { "clip_ratio/high_max": 0.002319809434993658, "clip_ratio/high_mean": 0.0010313342172594275, "clip_ratio/low_mean": 0.0006836294878667104, "clip_ratio/low_min": 1.3652250345330685e-05, "clip_ratio/region_mean": 0.0017149637133115903, "epoch": 1.0979877515310585, "grad_norm": 0.2110224813222885, "learning_rate": 1e-06, "loss": -0.0611, "step": 470 }, { "clip_ratio/high_max": 0.002344794207601808, "clip_ratio/high_mean": 0.0010146239837922622, "clip_ratio/low_mean": 0.0008716858128536842, "clip_ratio/low_min": 2.730450069066137e-05, "clip_ratio/region_mean": 0.0018863098011934198, "epoch": 1.100320793234179, "grad_norm": 0.21967659890651703, "learning_rate": 1e-06, "loss": -0.0612, "step": 471 }, { "clip_ratio/high_max": 0.002489184531441424, "clip_ratio/high_mean": 0.0010012208003900014, "clip_ratio/low_mean": 0.0009560151975165354, "clip_ratio/low_min": 2.730450069066137e-05, "clip_ratio/region_mean": 0.0019572360106394626, "epoch": 1.1026538349372994, "grad_norm": 0.2066105753183365, "learning_rate": 1e-06, "loss": -0.0613, "step": 472 }, { "clip_ratio/high_max": 0.00252743066084804, "clip_ratio/high_mean": 0.0011350118038535584, "clip_ratio/low_mean": 0.0006097807581681991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017447925638407469, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4005.0, "completions/mean_length": 1018.5781860351562, "completions/mean_terminated_length": 644.9737548828125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 1.10498687664042, "grad_norm": 0.4039445221424103, "learning_rate": 1e-06, "loss": -0.0568, "num_tokens": 74771958.0, "reward": 0.5870535969734192, "reward_std": 0.19910939037799835, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 473 }, { "clip_ratio/high_max": 0.0026595941744744778, "clip_ratio/high_mean": 0.001192385985632427, "clip_ratio/low_mean": 0.0008023298432817683, "clip_ratio/low_min": 4.411020017869305e-05, "clip_ratio/region_mean": 0.0019947158289141953, "epoch": 1.1073199183435405, "grad_norm": 0.2738463580608368, "learning_rate": 1e-06, "loss": -0.0571, "step": 474 }, { "clip_ratio/high_max": 0.0027832462365040556, "clip_ratio/high_mean": 0.0012730055022984743, "clip_ratio/low_mean": 0.0010500557673367439, "clip_ratio/low_min": 1.7043905245373026e-05, "clip_ratio/region_mean": 0.0023230613151099533, "epoch": 1.109652960046661, "grad_norm": 0.20586569607257843, "learning_rate": 1e-06, "loss": -0.0573, "step": 475 }, { "clip_ratio/high_max": 0.002503683470422402, "clip_ratio/high_mean": 0.0011811897020379547, "clip_ratio/low_mean": 0.0012160812730144244, "clip_ratio/low_min": 7.040552191028837e-05, "clip_ratio/region_mean": 0.0023972709241206758, "epoch": 1.1119860017497813, "grad_norm": 0.18886679410934448, "learning_rate": 1e-06, "loss": -0.0574, "step": 476 }, { "clip_ratio/high_max": 0.0021368232410168275, "clip_ratio/high_mean": 0.0008080906281975331, "clip_ratio/low_mean": 0.0006324650885289884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014405557049030904, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 1204.5435791015625, "completions/mean_terminated_length": 713.8263549804688, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 1.1143190434529018, "grad_norm": 0.259557843208313, "learning_rate": 1e-06, "loss": -0.0483, "num_tokens": 75415389.0, "reward": 0.5546875, "reward_std": 0.17461301386356354, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 477 }, { "clip_ratio/high_max": 0.0025909241594490595, "clip_ratio/high_mean": 0.0009477423245698446, "clip_ratio/low_mean": 0.000759488850235357, "clip_ratio/low_min": 2.4006145395105705e-05, "clip_ratio/region_mean": 0.0017072311820811592, "epoch": 1.1166520851560222, "grad_norm": 0.24066336452960968, "learning_rate": 1e-06, "loss": -0.0488, "step": 478 }, { "clip_ratio/high_max": 0.002895123907364905, "clip_ratio/high_mean": 0.0010247687441733433, "clip_ratio/low_mean": 0.0009534886503388407, "clip_ratio/low_min": 2.0997817046009004e-05, "clip_ratio/region_mean": 0.001978257394512184, "epoch": 1.1189851268591426, "grad_norm": 0.2084594964981079, "learning_rate": 1e-06, "loss": -0.0489, "step": 479 }, { "clip_ratio/high_max": 0.0030624620194430463, "clip_ratio/high_mean": 0.0010817544480232755, "clip_ratio/low_mean": 0.0010616067229420878, "clip_ratio/low_min": 3.600921991164796e-05, "clip_ratio/region_mean": 0.002143361169146374, "epoch": 1.121318168562263, "grad_norm": 0.18420426547527313, "learning_rate": 1e-06, "loss": -0.049, "step": 480 }, { "clip_ratio/high_max": 0.002763999182207044, "clip_ratio/high_mean": 0.0009472612764511723, "clip_ratio/low_mean": 0.0005508842104973155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014981455096858554, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 1244.794677734375, "completions/mean_terminated_length": 680.6524047851562, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.1236512102653835, "grad_norm": 0.2825188636779785, "learning_rate": 1e-06, "loss": -0.0476, "num_tokens": 76021717.0, "reward": 0.4933035969734192, "reward_std": 0.18652454018592834, "rewards/verify_math_reward/mean": 0.4933035671710968, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 481 }, { "clip_ratio/high_max": 0.003124406262941193, "clip_ratio/high_mean": 0.001103212394809816, "clip_ratio/low_mean": 0.0007747773897790466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001877989801869262, "epoch": 1.125984251968504, "grad_norm": 0.20673763751983643, "learning_rate": 1e-06, "loss": -0.0478, "step": 482 }, { "clip_ratio/high_max": 0.003279129487054888, "clip_ratio/high_mean": 0.0011481096553325187, "clip_ratio/low_mean": 0.0009372758395329583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002085385531245265, "epoch": 1.1283172936716244, "grad_norm": 0.18566632270812988, "learning_rate": 1e-06, "loss": -0.0481, "step": 483 }, { "clip_ratio/high_max": 0.0031690060313849244, "clip_ratio/high_mean": 0.0011772643392760074, "clip_ratio/low_mean": 0.001112159683543723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002289424002810847, "epoch": 1.1306503353747448, "grad_norm": 0.22507146000862122, "learning_rate": 1e-06, "loss": -0.0481, "step": 484 }, { "clip_ratio/high_max": 0.0023185262543847784, "clip_ratio/high_mean": 0.001118690150178736, "clip_ratio/low_mean": 0.0006218609123607166, "clip_ratio/low_min": 6.16264278505696e-05, "clip_ratio/region_mean": 0.001740551073453389, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3335.0, "completions/mean_length": 1082.216552734375, "completions/mean_terminated_length": 664.8055419921875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 1.1329833770778652, "grad_norm": 0.263689249753952, "learning_rate": 1e-06, "loss": -0.0726, "num_tokens": 76623487.0, "reward": 0.6194196939468384, "reward_std": 0.20711392164230347, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 485 }, { "clip_ratio/high_max": 0.002900182775192661, "clip_ratio/high_mean": 0.001376448115479434, "clip_ratio/low_mean": 0.0008805330962786684, "clip_ratio/low_min": 3.847503649012651e-05, "clip_ratio/region_mean": 0.0022569812135770917, "epoch": 1.1353164187809857, "grad_norm": 0.2990931272506714, "learning_rate": 1e-06, "loss": -0.0728, "step": 486 }, { "clip_ratio/high_max": 0.0032894488103920594, "clip_ratio/high_mean": 0.0014484953280771151, "clip_ratio/low_mean": 0.001043111295075505, "clip_ratio/low_min": 6.978736382734496e-05, "clip_ratio/region_mean": 0.0024916065667639486, "epoch": 1.137649460484106, "grad_norm": 0.22996681928634644, "learning_rate": 1e-06, "loss": -0.073, "step": 487 }, { "clip_ratio/high_max": 0.0028651291722781025, "clip_ratio/high_mean": 0.0013519937674573157, "clip_ratio/low_mean": 0.0011777003783208784, "clip_ratio/low_min": 9.049874097399879e-05, "clip_ratio/region_mean": 0.0025296941530541517, "epoch": 1.1399825021872265, "grad_norm": 0.2150421142578125, "learning_rate": 1e-06, "loss": -0.0731, "step": 488 }, { "clip_ratio/high_max": 0.002586822542070877, "clip_ratio/high_mean": 0.0009138073055510176, "clip_ratio/low_mean": 0.000544145361345727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001457952705095522, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2755.0, "completions/mean_length": 1221.638427734375, "completions/mean_terminated_length": 680.31298828125, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 1.142315543890347, "grad_norm": 0.25136440992355347, "learning_rate": 1e-06, "loss": -0.0724, "num_tokens": 77226531.0, "reward": 0.5803571939468384, "reward_std": 0.1724012941122055, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 489 }, { "clip_ratio/high_max": 0.0026457873682375066, "clip_ratio/high_mean": 0.0010887640182772884, "clip_ratio/low_mean": 0.0007683179364903481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018570819811429828, "epoch": 1.1446485855934676, "grad_norm": 0.20619578659534454, "learning_rate": 1e-06, "loss": -0.0726, "step": 490 }, { "clip_ratio/high_max": 0.00322708765452262, "clip_ratio/high_mean": 0.0011754703409678768, "clip_ratio/low_mean": 0.0009519013810859178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021273717211443, "epoch": 1.1469816272965878, "grad_norm": 0.18403246998786926, "learning_rate": 1e-06, "loss": -0.0727, "step": 491 }, { "clip_ratio/high_max": 0.003217021978343837, "clip_ratio/high_mean": 0.0010864435134863015, "clip_ratio/low_mean": 0.000938193590627634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020246370586392004, "epoch": 1.1493146689997085, "grad_norm": 0.1858193278312683, "learning_rate": 1e-06, "loss": -0.0728, "step": 492 }, { "clip_ratio/high_max": 0.0025213944536517374, "clip_ratio/high_mean": 0.001026797828671988, "clip_ratio/low_mean": 0.0005906803880861844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016174782140296884, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 1160.571533203125, "completions/mean_terminated_length": 675.7867431640625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 1.151647710702829, "grad_norm": 0.31809571385383606, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 77835723.0, "reward": 0.6149553656578064, "reward_std": 0.1844548135995865, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 493 }, { "clip_ratio/high_max": 0.0032942732359515503, "clip_ratio/high_mean": 0.0012894079809484538, "clip_ratio/low_mean": 0.0008035559458221542, "clip_ratio/low_min": 3.74925002688542e-05, "clip_ratio/region_mean": 0.00209296395041747, "epoch": 1.1539807524059493, "grad_norm": 0.24302072823047638, "learning_rate": 1e-06, "loss": -0.0663, "step": 494 }, { "clip_ratio/high_max": 0.0032003248561522923, "clip_ratio/high_mean": 0.0012832400789193343, "clip_ratio/low_mean": 0.000918714134968468, "clip_ratio/low_min": 1.2497500392782968e-05, "clip_ratio/region_mean": 0.002201954208430834, "epoch": 1.1563137941090698, "grad_norm": 0.22756662964820862, "learning_rate": 1e-06, "loss": -0.0664, "step": 495 }, { "clip_ratio/high_max": 0.003222346247639507, "clip_ratio/high_mean": 0.0012496600757003762, "clip_ratio/low_mean": 0.0011643295838439371, "clip_ratio/low_min": 3.794778240262531e-05, "clip_ratio/region_mean": 0.0024139896195265464, "epoch": 1.1586468358121902, "grad_norm": 0.3573664724826813, "learning_rate": 1e-06, "loss": -0.0665, "step": 496 }, { "clip_ratio/high_max": 0.0019172481770510785, "clip_ratio/high_mean": 0.00074302591929154, "clip_ratio/low_mean": 0.0005632942318243295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013063201477052644, "completions/clipped_ratio": 0.1930803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 1355.040283203125, "completions/mean_terminated_length": 699.1812133789062, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.1609798775153106, "grad_norm": 1.8756970167160034, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 78431575.0, "reward": 0.4676339626312256, "reward_std": 0.15312504768371582, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 497 }, { "clip_ratio/high_max": 0.002415666742308531, "clip_ratio/high_mean": 0.0008594937062298413, "clip_ratio/low_mean": 0.0005832253505104745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014427190653805155, "epoch": 1.163312919218431, "grad_norm": 0.2252153605222702, "learning_rate": 1e-06, "loss": -0.065, "step": 498 }, { "clip_ratio/high_max": 0.0028128914564149454, "clip_ratio/high_mean": 0.0009922890767484205, "clip_ratio/low_mean": 0.0008376346095246845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018299237162864301, "epoch": 1.1656459609215515, "grad_norm": 0.17894788086414337, "learning_rate": 1e-06, "loss": -0.0653, "step": 499 }, { "clip_ratio/high_max": 0.0026993598185072187, "clip_ratio/high_mean": 0.0009669624469097471, "clip_ratio/low_mean": 0.0008481379973090952, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018151004333049059, "epoch": 1.167979002624672, "grad_norm": 0.2169492393732071, "learning_rate": 1e-06, "loss": -0.0653, "step": 500 }, { "clip_ratio/high_max": 0.002118979173246771, "clip_ratio/high_mean": 0.0007394006988761248, "clip_ratio/low_mean": 0.000609270304266829, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001348671030427795, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 1115.4140625, "completions/mean_terminated_length": 711.2027587890625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.1703120443277923, "grad_norm": 0.24433636665344238, "learning_rate": 1e-06, "loss": -0.0318, "num_tokens": 79081194.0, "reward": 0.5725446939468384, "reward_std": 0.1630384922027588, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 501 }, { "clip_ratio/high_max": 0.002178889226343017, "clip_ratio/high_mean": 0.0007912378241599072, "clip_ratio/low_mean": 0.0007846136977605056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015758515146444552, "epoch": 1.1726450860309128, "grad_norm": 0.27686813473701477, "learning_rate": 1e-06, "loss": -0.0319, "step": 502 }, { "clip_ratio/high_max": 0.0022813988507550675, "clip_ratio/high_mean": 0.0008438164859398967, "clip_ratio/low_mean": 0.0009332925765193067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017771090642781928, "epoch": 1.1749781277340332, "grad_norm": 0.17822560667991638, "learning_rate": 1e-06, "loss": -0.0321, "step": 503 }, { "clip_ratio/high_max": 0.0022411858662962914, "clip_ratio/high_mean": 0.0007863507325964747, "clip_ratio/low_mean": 0.001046385004883632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001832735724747181, "epoch": 1.1773111694371536, "grad_norm": 0.6559427380561829, "learning_rate": 1e-06, "loss": -0.0321, "step": 504 }, { "clip_ratio/high_max": 0.002535623381845653, "clip_ratio/high_mean": 0.0008737004445720231, "clip_ratio/low_mean": 0.0006000905159453396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014737909732502885, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3476.0, "completions/mean_length": 1164.3851318359375, "completions/mean_terminated_length": 648.8516845703125, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 1.179644211140274, "grad_norm": 0.2354104369878769, "learning_rate": 1e-06, "loss": -0.0348, "num_tokens": 79658219.0, "reward": 0.606026828289032, "reward_std": 0.16308307647705078, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 505 }, { "clip_ratio/high_max": 0.0029321670372155495, "clip_ratio/high_mean": 0.0010944233181362506, "clip_ratio/low_mean": 0.0007464502914444893, "clip_ratio/low_min": 2.5778510462259874e-05, "clip_ratio/region_mean": 0.001840873621404171, "epoch": 1.1819772528433945, "grad_norm": 0.42677730321884155, "learning_rate": 1e-06, "loss": -0.035, "step": 506 }, { "clip_ratio/high_max": 0.0031047666707308963, "clip_ratio/high_mean": 0.001095973617339041, "clip_ratio/low_mean": 0.0009727769902383443, "clip_ratio/low_min": 1.2889255231129937e-05, "clip_ratio/region_mean": 0.0020687506039394066, "epoch": 1.184310294546515, "grad_norm": 0.18769286572933197, "learning_rate": 1e-06, "loss": -0.0351, "step": 507 }, { "clip_ratio/high_max": 0.0032864989043446258, "clip_ratio/high_mean": 0.001183237247460056, "clip_ratio/low_mean": 0.0010790460401040036, "clip_ratio/low_min": 2.8875028874608688e-05, "clip_ratio/region_mean": 0.002262283262098208, "epoch": 1.1866433362496354, "grad_norm": 0.19627745449543, "learning_rate": 1e-06, "loss": -0.0352, "step": 508 }, { "clip_ratio/high_max": 0.0026446826523169875, "clip_ratio/high_mean": 0.0011490188117022626, "clip_ratio/low_mean": 0.0005343578122847248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016833766276249662, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3132.0, "completions/mean_length": 1111.8984375, "completions/mean_terminated_length": 694.2760620117188, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.188976377952756, "grad_norm": 0.27344250679016113, "learning_rate": 1e-06, "loss": -0.0641, "num_tokens": 80295840.0, "reward": 0.5613839626312256, "reward_std": 0.2094450742006302, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 509 }, { "clip_ratio/high_max": 0.002799105452140793, "clip_ratio/high_mean": 0.0012503498728619888, "clip_ratio/low_mean": 0.0006864040014988859, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019367538698134013, "epoch": 1.1913094196558764, "grad_norm": 0.24334704875946045, "learning_rate": 1e-06, "loss": -0.0643, "step": 510 }, { "clip_ratio/high_max": 0.0032409555642516352, "clip_ratio/high_mean": 0.0013953156667412259, "clip_ratio/low_mean": 0.0009375580593768973, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002332873671548441, "epoch": 1.1936424613589969, "grad_norm": 0.20601294934749603, "learning_rate": 1e-06, "loss": -0.0646, "step": 511 }, { "clip_ratio/high_max": 0.003297197137726471, "clip_ratio/high_mean": 0.0014238302755984478, "clip_ratio/low_mean": 0.0010984683976857923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025222986441804096, "epoch": 1.1959755030621173, "grad_norm": 0.2185668647289276, "learning_rate": 1e-06, "loss": -0.0646, "step": 512 }, { "clip_ratio/high_max": 0.003037714857782703, "clip_ratio/high_mean": 0.0009864802850643173, "clip_ratio/low_mean": 0.0005879371342416562, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015744174088467844, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 1190.5179443359375, "completions/mean_terminated_length": 652.465576171875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.1983085447652377, "grad_norm": 0.40251195430755615, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 80882192.0, "reward": 0.5379464626312256, "reward_std": 0.17191162705421448, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 513 }, { "clip_ratio/high_max": 0.00348700176255079, "clip_ratio/high_mean": 0.0011312556052871514, "clip_ratio/low_mean": 0.0009178361360682175, "clip_ratio/low_min": 5.277274340187432e-05, "clip_ratio/region_mean": 0.002049091759545263, "epoch": 1.2006415864683582, "grad_norm": 0.3140200078487396, "learning_rate": 1e-06, "loss": -0.0519, "step": 514 }, { "clip_ratio/high_max": 0.003404418697755318, "clip_ratio/high_mean": 0.0011388463208277244, "clip_ratio/low_mean": 0.0010400396158729563, "clip_ratio/low_min": 6.0718910390278324e-05, "clip_ratio/region_mean": 0.002178885908506345, "epoch": 1.2029746281714786, "grad_norm": 0.22932732105255127, "learning_rate": 1e-06, "loss": -0.052, "step": 515 }, { "clip_ratio/high_max": 0.0034314867298235185, "clip_ratio/high_mean": 0.0011634763486654265, "clip_ratio/low_mean": 0.0012518906064542534, "clip_ratio/low_min": 8.931949196266942e-05, "clip_ratio/region_mean": 0.0024153669000952505, "epoch": 1.205307669874599, "grad_norm": 0.20296232402324677, "learning_rate": 1e-06, "loss": -0.0521, "step": 516 }, { "clip_ratio/high_max": 0.0024405604708590545, "clip_ratio/high_mean": 0.0010224582420050865, "clip_ratio/low_mean": 0.000716106515028514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017385647879564203, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 1228.0670166015625, "completions/mean_terminated_length": 714.85791015625, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 1.2076407115777195, "grad_norm": 0.3901233673095703, "learning_rate": 1e-06, "loss": -0.0669, "num_tokens": 81518628.0, "reward": 0.5111607313156128, "reward_std": 0.21583227813243866, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 517 }, { "clip_ratio/high_max": 0.002746026453678496, "clip_ratio/high_mean": 0.0012023100352962501, "clip_ratio/low_mean": 0.0009844303167483304, "clip_ratio/low_min": 1.5356265066657215e-05, "clip_ratio/region_mean": 0.0021867403120268136, "epoch": 1.20997375328084, "grad_norm": 0.2223348319530487, "learning_rate": 1e-06, "loss": -0.0672, "step": 518 }, { "clip_ratio/high_max": 0.0027386314686737023, "clip_ratio/high_mean": 0.0012098563529434614, "clip_ratio/low_mean": 0.0011314599723846186, "clip_ratio/low_min": 1.5356265066657215e-05, "clip_ratio/region_mean": 0.0023413162780343555, "epoch": 1.2123067949839603, "grad_norm": 1.712908387184143, "learning_rate": 1e-06, "loss": -0.0672, "step": 519 }, { "clip_ratio/high_max": 0.002672797709237784, "clip_ratio/high_mean": 0.0011744501935027074, "clip_ratio/low_mean": 0.0012705940171144903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024450441778753884, "epoch": 1.2146398366870808, "grad_norm": 0.23629812896251678, "learning_rate": 1e-06, "loss": -0.0532, "step": 520 }, { "clip_ratio/high_max": 0.0024207590831792913, "clip_ratio/high_mean": 0.0009819833776418818, "clip_ratio/low_mean": 0.00044106705627200427, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014230504057195503, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 1059.607177734375, "completions/mean_terminated_length": 630.25732421875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 1.2169728783902012, "grad_norm": 0.27234286069869995, "learning_rate": 1e-06, "loss": -0.0738, "num_tokens": 82098532.0, "reward": 0.6071428656578064, "reward_std": 0.16676045954227448, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 521 }, { "clip_ratio/high_max": 0.002897356724133715, "clip_ratio/high_mean": 0.001130295218899846, "clip_ratio/low_mean": 0.0006172338680698886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017475290878792293, "epoch": 1.2193059200933216, "grad_norm": 0.22588258981704712, "learning_rate": 1e-06, "loss": -0.0739, "step": 522 }, { "clip_ratio/high_max": 0.0028699926551780663, "clip_ratio/high_mean": 0.001169050250609871, "clip_ratio/low_mean": 0.0007686665858273045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019377168282517232, "epoch": 1.221638961796442, "grad_norm": 0.20201407372951508, "learning_rate": 1e-06, "loss": -0.0741, "step": 523 }, { "clip_ratio/high_max": 0.0027396416116971523, "clip_ratio/high_mean": 0.0011099947041657288, "clip_ratio/low_mean": 0.0008731172983971192, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001983111971640028, "epoch": 1.2239720034995625, "grad_norm": 0.19742225110530853, "learning_rate": 1e-06, "loss": -0.0742, "step": 524 }, { "clip_ratio/high_max": 0.002289624680997804, "clip_ratio/high_mean": 0.0009703290834295331, "clip_ratio/low_mean": 0.00040320868265553145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013735377397097182, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 1162.51904296875, "completions/mean_terminated_length": 623.87451171875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.226305045202683, "grad_norm": 0.26374271512031555, "learning_rate": 1e-06, "loss": -0.0744, "num_tokens": 82656301.0, "reward": 0.5848214626312256, "reward_std": 0.15534067153930664, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 525 }, { "clip_ratio/high_max": 0.002694363225600682, "clip_ratio/high_mean": 0.0011102521639259066, "clip_ratio/low_mean": 0.0005661382783728186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016763904823164921, "epoch": 1.2286380869058036, "grad_norm": 0.20919963717460632, "learning_rate": 1e-06, "loss": -0.0746, "step": 526 }, { "clip_ratio/high_max": 0.003078229638049379, "clip_ratio/high_mean": 0.0012590037076734006, "clip_ratio/low_mean": 0.0006440094502977445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019030131297768094, "epoch": 1.2309711286089238, "grad_norm": 0.201873779296875, "learning_rate": 1e-06, "loss": -0.0748, "step": 527 }, { "clip_ratio/high_max": 0.0029209858184913173, "clip_ratio/high_mean": 0.0011904968596354593, "clip_ratio/low_mean": 0.0007235945558932144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019140914446325041, "epoch": 1.2333041703120444, "grad_norm": 0.20124907791614532, "learning_rate": 1e-06, "loss": -0.0748, "step": 528 }, { "clip_ratio/high_max": 0.0022860956378281116, "clip_ratio/high_mean": 0.0008535612159903394, "clip_ratio/low_mean": 0.0004766606289194897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013302218394528609, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3432.0, "completions/mean_length": 1132.89404296875, "completions/mean_terminated_length": 625.4862670898438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 1.2356372120151649, "grad_norm": 0.2638510763645172, "learning_rate": 1e-06, "loss": -0.0515, "num_tokens": 83225670.0, "reward": 0.543526828289032, "reward_std": 0.1471058428287506, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 529 }, { "clip_ratio/high_max": 0.002682746373466216, "clip_ratio/high_mean": 0.0009184516202367377, "clip_ratio/low_mean": 0.0005321112294041086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001450562893296592, "epoch": 1.2379702537182853, "grad_norm": 0.502601146697998, "learning_rate": 1e-06, "loss": -0.0515, "step": 530 }, { "clip_ratio/high_max": 0.0027897974723600782, "clip_ratio/high_mean": 0.0010453931881784229, "clip_ratio/low_mean": 0.000832491497931187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018778846497298218, "epoch": 1.2403032954214057, "grad_norm": 0.18539969623088837, "learning_rate": 1e-06, "loss": -0.0518, "step": 531 }, { "clip_ratio/high_max": 0.0028924906509928405, "clip_ratio/high_mean": 0.0009563182502461132, "clip_ratio/low_mean": 0.0009202150467899628, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018765333297778852, "epoch": 1.2426363371245261, "grad_norm": 0.19006116688251495, "learning_rate": 1e-06, "loss": -0.0519, "step": 532 }, { "clip_ratio/high_max": 0.002146041631931439, "clip_ratio/high_mean": 0.0009605318155081477, "clip_ratio/low_mean": 0.0007193819401436485, "clip_ratio/low_min": 1.6684463844285347e-05, "clip_ratio/region_mean": 0.0016799137301859446, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 1086.0770263671875, "completions/mean_terminated_length": 647.290283203125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 1.2449693788276466, "grad_norm": 0.32840490341186523, "learning_rate": 1e-06, "loss": -0.0678, "num_tokens": 83813115.0, "reward": 0.6071428656578064, "reward_std": 0.20436903834342957, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 533 }, { "clip_ratio/high_max": 0.002912695570557844, "clip_ratio/high_mean": 0.001161986499937484, "clip_ratio/low_mean": 0.001010933567158645, "clip_ratio/low_min": 3.3958163839997724e-05, "clip_ratio/region_mean": 0.002172920030716341, "epoch": 1.247302420530767, "grad_norm": 0.2582855224609375, "learning_rate": 1e-06, "loss": -0.0682, "step": 534 }, { "clip_ratio/high_max": 0.0029511729298974387, "clip_ratio/high_mean": 0.0012113627963117324, "clip_ratio/low_mean": 0.001218211564264493, "clip_ratio/low_min": 0.00010018913781095762, "clip_ratio/region_mean": 0.0024295743933180347, "epoch": 1.2496354622338874, "grad_norm": 0.23892898857593536, "learning_rate": 1e-06, "loss": -0.0683, "step": 535 }, { "clip_ratio/high_max": 0.0026392058498458937, "clip_ratio/high_mean": 0.0011167974698764738, "clip_ratio/low_mean": 0.0013427577978291083, "clip_ratio/low_min": 4.8603488721710164e-05, "clip_ratio/region_mean": 0.0024595552458777092, "epoch": 1.2519685039370079, "grad_norm": 0.23843353986740112, "learning_rate": 1e-06, "loss": -0.0684, "step": 536 }, { "clip_ratio/high_max": 0.0032015571268857457, "clip_ratio/high_mean": 0.0013849372480763122, "clip_ratio/low_mean": 0.0006303016987203591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020152389697614126, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3665.0, "completions/mean_length": 1050.094970703125, "completions/mean_terminated_length": 606.0626831054688, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.2543015456401283, "grad_norm": 0.3010174632072449, "learning_rate": 1e-06, "loss": -0.0897, "num_tokens": 84371072.0, "reward": 0.6205357313156128, "reward_std": 0.212863951921463, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 537 }, { "clip_ratio/high_max": 0.003555250455974601, "clip_ratio/high_mean": 0.0016362760070478544, "clip_ratio/low_mean": 0.0008980442271422362, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002534320250560995, "epoch": 1.2566345873432487, "grad_norm": 0.335114449262619, "learning_rate": 1e-06, "loss": -0.0896, "step": 538 }, { "clip_ratio/high_max": 0.0032455954351462424, "clip_ratio/high_mean": 0.00156883606541669, "clip_ratio/low_mean": 0.0010994934837071924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026683295000111684, "epoch": 1.2589676290463692, "grad_norm": 0.23157766461372375, "learning_rate": 1e-06, "loss": -0.0902, "step": 539 }, { "clip_ratio/high_max": 0.0030814962156000547, "clip_ratio/high_mean": 0.001454107648896752, "clip_ratio/low_mean": 0.0013161107672203798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027702184233930893, "epoch": 1.2613006707494896, "grad_norm": 0.2324124574661255, "learning_rate": 1e-06, "loss": -0.0902, "step": 540 }, { "clip_ratio/high_max": 0.0024609910033177584, "clip_ratio/high_mean": 0.0009634637299313908, "clip_ratio/low_mean": 0.0006899148211232387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00165337854923564, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 1119.9632568359375, "completions/mean_terminated_length": 655.3174438476562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.26363371245261, "grad_norm": 0.2652825713157654, "learning_rate": 1e-06, "loss": -0.0606, "num_tokens": 84970543.0, "reward": 0.5234375, "reward_std": 0.19581179320812225, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 541 }, { "clip_ratio/high_max": 0.002860011620214209, "clip_ratio/high_mean": 0.0011692748048517387, "clip_ratio/low_mean": 0.0009046125069289701, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020738873427035287, "epoch": 1.2659667541557305, "grad_norm": 0.24552273750305176, "learning_rate": 1e-06, "loss": -0.0608, "step": 542 }, { "clip_ratio/high_max": 0.0029651758304680698, "clip_ratio/high_mean": 0.001130644555814797, "clip_ratio/low_mean": 0.001115142342314357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002245786861749366, "epoch": 1.268299795858851, "grad_norm": 0.2158423513174057, "learning_rate": 1e-06, "loss": -0.0611, "step": 543 }, { "clip_ratio/high_max": 0.0028027433581883088, "clip_ratio/high_mean": 0.0011288594323559664, "clip_ratio/low_mean": 0.0012330880854278803, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023619475250598043, "epoch": 1.2706328375619713, "grad_norm": 0.2166370153427124, "learning_rate": 1e-06, "loss": -0.0611, "step": 544 }, { "clip_ratio/high_max": 0.00243698730628239, "clip_ratio/high_mean": 0.0012496500712586567, "clip_ratio/low_mean": 0.0005229877924648463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017726378791849129, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3354.0, "completions/mean_length": 1139.4163818359375, "completions/mean_terminated_length": 677.8077392578125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 1.272965879265092, "grad_norm": 0.30373507738113403, "learning_rate": 1e-06, "loss": -0.0641, "num_tokens": 85582068.0, "reward": 0.5725446939468384, "reward_std": 0.21256154775619507, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 545 }, { "clip_ratio/high_max": 0.002860502376279328, "clip_ratio/high_mean": 0.001370000249153236, "clip_ratio/low_mean": 0.000659561122120067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002029561386734713, "epoch": 1.2752989209682122, "grad_norm": 0.2634854018688202, "learning_rate": 1e-06, "loss": -0.0642, "step": 546 }, { "clip_ratio/high_max": 0.0030549829389201477, "clip_ratio/high_mean": 0.001465926892706193, "clip_ratio/low_mean": 0.0008710539696039632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002336980825930368, "epoch": 1.2776319626713328, "grad_norm": 0.21409092843532562, "learning_rate": 1e-06, "loss": -0.0645, "step": 547 }, { "clip_ratio/high_max": 0.0032525419592275284, "clip_ratio/high_mean": 0.0014321925336844288, "clip_ratio/low_mean": 0.0010119540747837164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024441465866402723, "epoch": 1.2799650043744533, "grad_norm": 0.26473334431648254, "learning_rate": 1e-06, "loss": -0.0645, "step": 548 }, { "clip_ratio/high_max": 0.002774135435174685, "clip_ratio/high_mean": 0.0009801686646824237, "clip_ratio/low_mean": 0.0006248924437386449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016050610938691534, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3289.0, "completions/mean_length": 1204.4765625, "completions/mean_terminated_length": 636.9813232421875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 1.2822980460775737, "grad_norm": 0.2573978304862976, "learning_rate": 1e-06, "loss": -0.0541, "num_tokens": 86151479.0, "reward": 0.5424107313156128, "reward_std": 0.1694946438074112, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 549 }, { "clip_ratio/high_max": 0.0028881392936455086, "clip_ratio/high_mean": 0.0010782591307361145, "clip_ratio/low_mean": 0.0008248954045484425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019031545598409139, "epoch": 1.2846310877806941, "grad_norm": 0.21310731768608093, "learning_rate": 1e-06, "loss": -0.0543, "step": 550 }, { "clip_ratio/high_max": 0.0031636353087378666, "clip_ratio/high_mean": 0.0011803750021499582, "clip_ratio/low_mean": 0.0010167299133172492, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021971050009597093, "epoch": 1.2869641294838146, "grad_norm": 0.2066744863986969, "learning_rate": 1e-06, "loss": -0.0545, "step": 551 }, { "clip_ratio/high_max": 0.0031374781756312586, "clip_ratio/high_mean": 0.0010388634782430017, "clip_ratio/low_mean": 0.0011535085795912892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021923720778431743, "epoch": 1.289297171186935, "grad_norm": 0.19866877794265747, "learning_rate": 1e-06, "loss": -0.0545, "step": 552 }, { "clip_ratio/high_max": 0.0025398693614988588, "clip_ratio/high_mean": 0.0009373759967274964, "clip_ratio/low_mean": 0.0008561032118450385, "clip_ratio/low_min": 8.585442265029997e-05, "clip_ratio/region_mean": 0.001793479241314344, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3080.0, "completions/mean_length": 1267.1685791015625, "completions/mean_terminated_length": 711.9773559570312, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 1.2916302128900554, "grad_norm": 0.2963334023952484, "learning_rate": 1e-06, "loss": -0.0581, "num_tokens": 86780534.0, "reward": 0.455357164144516, "reward_std": 0.21586939692497253, "rewards/verify_math_reward/mean": 0.4553571343421936, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 553 }, { "clip_ratio/high_max": 0.0031679862877354026, "clip_ratio/high_mean": 0.001169782848592149, "clip_ratio/low_mean": 0.0010967388516291976, "clip_ratio/low_min": 5.686277563654585e-05, "clip_ratio/region_mean": 0.002266521711135283, "epoch": 1.2939632545931758, "grad_norm": 0.2768391966819763, "learning_rate": 1e-06, "loss": -0.0583, "step": 554 }, { "clip_ratio/high_max": 0.002995738384925062, "clip_ratio/high_mean": 0.0011520195203047479, "clip_ratio/low_mean": 0.0013117311154928757, "clip_ratio/low_min": 0.00013993741413287353, "clip_ratio/region_mean": 0.0024637506139697507, "epoch": 1.2962962962962963, "grad_norm": 0.23656485974788666, "learning_rate": 1e-06, "loss": -0.0585, "step": 555 }, { "clip_ratio/high_max": 0.0031099224979698192, "clip_ratio/high_mean": 0.0011650168416963425, "clip_ratio/low_mean": 0.0014415876466955524, "clip_ratio/low_min": 0.00012875029824499507, "clip_ratio/region_mean": 0.00260660450294381, "epoch": 1.2986293379994167, "grad_norm": 0.7241779565811157, "learning_rate": 1e-06, "loss": -0.0585, "step": 556 }, { "clip_ratio/high_max": 0.0025796180252655176, "clip_ratio/high_mean": 0.000897256073585595, "clip_ratio/low_mean": 0.0006327187184069771, "clip_ratio/low_min": 1.550099295855034e-05, "clip_ratio/region_mean": 0.001529974795630551, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 1233.6741943359375, "completions/mean_terminated_length": 699.1205444335938, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 1.3009623797025371, "grad_norm": 0.22989463806152344, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 87401426.0, "reward": 0.4888392984867096, "reward_std": 0.15980760753154755, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 557 }, { "clip_ratio/high_max": 0.00261121618677862, "clip_ratio/high_mean": 0.001003954194857215, "clip_ratio/low_mean": 0.0007804395818311605, "clip_ratio/low_min": 3.7686499126721174e-05, "clip_ratio/region_mean": 0.0017843937730503967, "epoch": 1.3032954214056576, "grad_norm": 0.39446935057640076, "learning_rate": 1e-06, "loss": -0.0573, "step": 558 }, { "clip_ratio/high_max": 0.002796941033011535, "clip_ratio/high_mean": 0.0010352858107580687, "clip_ratio/low_mean": 0.0008649121973576257, "clip_ratio/low_min": 2.199155460402835e-05, "clip_ratio/region_mean": 0.001900197999930242, "epoch": 1.305628463108778, "grad_norm": 0.19332773983478546, "learning_rate": 1e-06, "loss": -0.0575, "step": 559 }, { "clip_ratio/high_max": 0.0026085253321070923, "clip_ratio/high_mean": 0.001023645859277167, "clip_ratio/low_mean": 0.0010228519458905794, "clip_ratio/low_min": 5.475709622260183e-05, "clip_ratio/region_mean": 0.0020464977824303787, "epoch": 1.3079615048118984, "grad_norm": 0.19031274318695068, "learning_rate": 1e-06, "loss": -0.0575, "step": 560 }, { "clip_ratio/high_max": 0.002163977282179985, "clip_ratio/high_mean": 0.0009806452289922163, "clip_ratio/low_mean": 0.0005480942168105685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001528739416244207, "completions/clipped_ratio": 0.1830357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 1329.305908203125, "completions/mean_terminated_length": 709.4453125, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 1.3102945465150189, "grad_norm": 9.381816864013672, "learning_rate": 1e-06, "loss": -0.0771, "num_tokens": 88014668.0, "reward": 0.4743303656578064, "reward_std": 0.18550466001033783, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 561 }, { "clip_ratio/high_max": 0.002293708334036637, "clip_ratio/high_mean": 0.0009090552757697878, "clip_ratio/low_mean": 0.0005726905969822838, "clip_ratio/low_min": 1.6391293684137054e-05, "clip_ratio/region_mean": 0.0014817458504694514, "epoch": 1.3126275882181395, "grad_norm": 0.293111115694046, "learning_rate": 1e-06, "loss": -0.0775, "step": 562 }, { "clip_ratio/high_max": 0.002479050548572559, "clip_ratio/high_mean": 0.0010085553767567035, "clip_ratio/low_mean": 0.00064487201780139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016534274109289981, "epoch": 1.3149606299212597, "grad_norm": 0.3953162729740143, "learning_rate": 1e-06, "loss": -0.0776, "step": 563 }, { "clip_ratio/high_max": 0.0029186286628828384, "clip_ratio/high_mean": 0.0011614363247645088, "clip_ratio/low_mean": 0.0008803043228908791, "clip_ratio/low_min": 1.6391293684137054e-05, "clip_ratio/region_mean": 0.0020417406340129673, "epoch": 1.3172936716243804, "grad_norm": 0.19772620499134064, "learning_rate": 1e-06, "loss": -0.0777, "step": 564 }, { "clip_ratio/high_max": 0.002451934364216868, "clip_ratio/high_mean": 0.0010848782130779, "clip_ratio/low_mean": 0.0007178839987318497, "clip_ratio/low_min": 3.587527407944435e-05, "clip_ratio/region_mean": 0.0018027622136287391, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 1258.602783203125, "completions/mean_terminated_length": 701.7303466796875, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 1.3196267133275008, "grad_norm": 0.27217230200767517, "learning_rate": 1e-06, "loss": -0.0614, "num_tokens": 88633672.0, "reward": 0.4832589626312256, "reward_std": 0.20850147306919098, "rewards/verify_math_reward/mean": 0.4832589328289032, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 565 }, { "clip_ratio/high_max": 0.002639128564624116, "clip_ratio/high_mean": 0.0012068962369085057, "clip_ratio/low_mean": 0.0009106867146329023, "clip_ratio/low_min": 4.678676941693993e-05, "clip_ratio/region_mean": 0.002117582960636355, "epoch": 1.3219597550306212, "grad_norm": 0.34635865688323975, "learning_rate": 1e-06, "loss": -0.0619, "step": 566 }, { "clip_ratio/high_max": 0.0026879405486397445, "clip_ratio/high_mean": 0.0012492795940488577, "clip_ratio/low_mean": 0.0011461309823062038, "clip_ratio/low_min": 7.17505481588887e-05, "clip_ratio/region_mean": 0.0023954105636221357, "epoch": 1.3242927967337417, "grad_norm": 0.2328748255968094, "learning_rate": 1e-06, "loss": -0.0622, "step": 567 }, { "clip_ratio/high_max": 0.0028468191012507305, "clip_ratio/high_mean": 0.0012138105084886774, "clip_ratio/low_mean": 0.0013346866235224297, "clip_ratio/low_min": 6.643166852882132e-05, "clip_ratio/region_mean": 0.0025484971702098846, "epoch": 1.326625838436862, "grad_norm": 0.25093185901641846, "learning_rate": 1e-06, "loss": -0.0622, "step": 568 }, { "clip_ratio/high_max": 0.0027055971549998503, "clip_ratio/high_mean": 0.0010821358318935381, "clip_ratio/low_mean": 0.0007431913873006124, "clip_ratio/low_min": 8.24320795800304e-06, "clip_ratio/region_mean": 0.0018253271919093095, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3271.0, "completions/mean_length": 1155.7467041015625, "completions/mean_terminated_length": 696.687744140625, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 1.3289588801399825, "grad_norm": 0.3302409052848816, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 89265173.0, "reward": 0.5033482313156128, "reward_std": 0.2006521373987198, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 569 }, { "clip_ratio/high_max": 0.002694445545785129, "clip_ratio/high_mean": 0.0011429381265770644, "clip_ratio/low_mean": 0.0008473033649352146, "clip_ratio/low_min": 1.648641591600608e-05, "clip_ratio/region_mean": 0.001990241500607226, "epoch": 1.331291921843103, "grad_norm": 0.2360229790210724, "learning_rate": 1e-06, "loss": -0.0414, "step": 570 }, { "clip_ratio/high_max": 0.0027527334459591657, "clip_ratio/high_mean": 0.001195461634779349, "clip_ratio/low_mean": 0.0010750562105386052, "clip_ratio/low_min": 3.5036962799495086e-05, "clip_ratio/region_mean": 0.0022705178344040178, "epoch": 1.3336249635462234, "grad_norm": 0.22802916169166565, "learning_rate": 1e-06, "loss": -0.0416, "step": 571 }, { "clip_ratio/high_max": 0.0030087616105447523, "clip_ratio/high_mean": 0.0012076020648237318, "clip_ratio/low_mean": 0.0012827374539483571, "clip_ratio/low_min": 4.134225127927493e-05, "clip_ratio/region_mean": 0.002490339509677142, "epoch": 1.3359580052493438, "grad_norm": 0.217322438955307, "learning_rate": 1e-06, "loss": -0.0416, "step": 572 }, { "clip_ratio/high_max": 0.0022433050035033375, "clip_ratio/high_mean": 0.0009794376092031598, "clip_ratio/low_mean": 0.0006091834147810005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001588621027622139, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3822.0, "completions/mean_length": 1230.3616943359375, "completions/mean_terminated_length": 735.2513427734375, "completions/min_length": 213.0, "completions/min_terminated_length": 213.0, "epoch": 1.3382910469524643, "grad_norm": 0.49397042393684387, "learning_rate": 1e-06, "loss": -0.0519, "num_tokens": 89923121.0, "reward": 0.5301339626312256, "reward_std": 0.1986968070268631, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 573 }, { "clip_ratio/high_max": 0.002747394915786572, "clip_ratio/high_mean": 0.0010957300728478003, "clip_ratio/low_mean": 0.0008107475587166846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019064776352024637, "epoch": 1.3406240886555847, "grad_norm": 2.2493412494659424, "learning_rate": 1e-06, "loss": -0.0519, "step": 574 }, { "clip_ratio/high_max": 0.002587171853519976, "clip_ratio/high_mean": 0.0010944724490400404, "clip_ratio/low_mean": 0.0009208672545355512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020153397126705386, "epoch": 1.3429571303587051, "grad_norm": 0.26067569851875305, "learning_rate": 1e-06, "loss": -0.0521, "step": 575 }, { "clip_ratio/high_max": 0.0031339259003289044, "clip_ratio/high_mean": 0.0013005566434003413, "clip_ratio/low_mean": 0.001055819375324063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002356376040552277, "epoch": 1.3452901720618256, "grad_norm": 0.2161387801170349, "learning_rate": 1e-06, "loss": -0.0523, "step": 576 }, { "clip_ratio/high_max": 0.0019980782526545227, "clip_ratio/high_mean": 0.00078752060107945, "clip_ratio/low_mean": 0.0007162734063967946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015037939811008982, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 1143.196533203125, "completions/mean_terminated_length": 628.487548828125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 1.347623213764946, "grad_norm": 0.27860888838768005, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 90489313.0, "reward": 0.5446428656578064, "reward_std": 0.1825447827577591, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 577 }, { "clip_ratio/high_max": 0.002596547157736495, "clip_ratio/high_mean": 0.00102948046696838, "clip_ratio/low_mean": 0.0010464444894751068, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002075924989185296, "epoch": 1.3499562554680664, "grad_norm": 0.22397665679454803, "learning_rate": 1e-06, "loss": -0.0668, "step": 578 }, { "clip_ratio/high_max": 0.0026784937508637086, "clip_ratio/high_mean": 0.001074605042958865, "clip_ratio/low_mean": 0.0009877793781924993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020623844538931735, "epoch": 1.352289297171187, "grad_norm": 0.31049689650535583, "learning_rate": 1e-06, "loss": -0.0668, "step": 579 }, { "clip_ratio/high_max": 0.002884976100176573, "clip_ratio/high_mean": 0.0010623514790495392, "clip_ratio/low_mean": 0.001206621149322018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022689726611133665, "epoch": 1.3546223388743073, "grad_norm": 0.2444876730442047, "learning_rate": 1e-06, "loss": -0.067, "step": 580 }, { "clip_ratio/high_max": 0.002597758488263935, "clip_ratio/high_mean": 0.0009088348488148768, "clip_ratio/low_mean": 0.0005896810807826114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001498515957791824, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3535.0, "completions/mean_length": 1151.1429443359375, "completions/mean_terminated_length": 673.70166015625, "completions/min_length": 202.0, "completions/min_terminated_length": 202.0, "epoch": 1.356955380577428, "grad_norm": 0.28441837430000305, "learning_rate": 1e-06, "loss": -0.0517, "num_tokens": 91112121.0, "reward": 0.5446428656578064, "reward_std": 0.1658935248851776, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 581 }, { "clip_ratio/high_max": 0.002849523712939117, "clip_ratio/high_mean": 0.0010559622733126162, "clip_ratio/low_mean": 0.0008172052166628418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018731674645096064, "epoch": 1.3592884222805481, "grad_norm": 0.2312406301498413, "learning_rate": 1e-06, "loss": -0.0519, "step": 582 }, { "clip_ratio/high_max": 0.0032241656372207217, "clip_ratio/high_mean": 0.0010953661039820872, "clip_ratio/low_mean": 0.000993555158856907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020889212464680895, "epoch": 1.3616214639836688, "grad_norm": 0.21330702304840088, "learning_rate": 1e-06, "loss": -0.0521, "step": 583 }, { "clip_ratio/high_max": 0.0034261067994521, "clip_ratio/high_mean": 0.0012234174682816956, "clip_ratio/low_mean": 0.0011079603573307395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023313778146984987, "epoch": 1.3639545056867892, "grad_norm": 0.21820618212223053, "learning_rate": 1e-06, "loss": -0.0522, "step": 584 }, { "clip_ratio/high_max": 0.0022309218256850727, "clip_ratio/high_mean": 0.0009113126070587896, "clip_ratio/low_mean": 0.0006186518075992353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015299644364858977, "completions/clipped_ratio": 0.1808035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 1337.560302734375, "completions/mean_terminated_length": 728.7493286132812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.3662875473899097, "grad_norm": 0.4516977369785309, "learning_rate": 1e-06, "loss": -0.0677, "num_tokens": 91734631.0, "reward": 0.4966517984867096, "reward_std": 0.20636461675167084, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 585 }, { "clip_ratio/high_max": 0.002618747326778248, "clip_ratio/high_mean": 0.001014440991639276, "clip_ratio/low_mean": 0.0008469320109725231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001861373006249778, "epoch": 1.36862058909303, "grad_norm": 0.3494608402252197, "learning_rate": 1e-06, "loss": -0.068, "step": 586 }, { "clip_ratio/high_max": 0.0027209750187466852, "clip_ratio/high_mean": 0.0011050710490962956, "clip_ratio/low_mean": 0.0010038205618911888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002108891618263442, "epoch": 1.3709536307961505, "grad_norm": 0.23081910610198975, "learning_rate": 1e-06, "loss": -0.0681, "step": 587 }, { "clip_ratio/high_max": 0.002853392194083426, "clip_ratio/high_mean": 0.0010605590832710732, "clip_ratio/low_mean": 0.0011615040129981935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022220630635274574, "epoch": 1.373286672499271, "grad_norm": 0.21421265602111816, "learning_rate": 1e-06, "loss": -0.0683, "step": 588 }, { "clip_ratio/high_max": 0.0026516875805100426, "clip_ratio/high_mean": 0.001054480593666085, "clip_ratio/low_mean": 0.0007563284561911132, "clip_ratio/low_min": 7.207330872915918e-05, "clip_ratio/region_mean": 0.0018108089934685268, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 1241.4085693359375, "completions/mean_terminated_length": 699.3014526367188, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 1.3756197142023914, "grad_norm": 0.28075045347213745, "learning_rate": 1e-06, "loss": -0.0893, "num_tokens": 92344653.0, "reward": 0.5212053656578064, "reward_std": 0.2188381552696228, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 589 }, { "clip_ratio/high_max": 0.0029952514087199233, "clip_ratio/high_mean": 0.001245402017957531, "clip_ratio/low_mean": 0.0008863159619068028, "clip_ratio/low_min": 1.5082046047609765e-05, "clip_ratio/region_mean": 0.0021317179343895987, "epoch": 1.3779527559055118, "grad_norm": 0.24631904065608978, "learning_rate": 1e-06, "loss": -0.0896, "step": 590 }, { "clip_ratio/high_max": 0.003006512917636428, "clip_ratio/high_mean": 0.0012828278522647452, "clip_ratio/low_mean": 0.0010212881079496583, "clip_ratio/low_min": 8.188196079572663e-05, "clip_ratio/region_mean": 0.0023041159511194564, "epoch": 1.3802857976086322, "grad_norm": 0.25548332929611206, "learning_rate": 1e-06, "loss": -0.0898, "step": 591 }, { "clip_ratio/high_max": 0.003201304869435262, "clip_ratio/high_mean": 0.0013419906681519933, "clip_ratio/low_mean": 0.001210546717629768, "clip_ratio/low_min": 8.185838123608846e-05, "clip_ratio/region_mean": 0.002552537356677931, "epoch": 1.3826188393117527, "grad_norm": 0.21688659489154816, "learning_rate": 1e-06, "loss": -0.0899, "step": 592 }, { "clip_ratio/high_max": 0.0021212757128523663, "clip_ratio/high_mean": 0.0009215079808200244, "clip_ratio/low_mean": 0.0007629704941791715, "clip_ratio/low_min": 4.211662962916307e-05, "clip_ratio/region_mean": 0.0016844784695422277, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 1055.1239013671875, "completions/mean_terminated_length": 647.1076049804688, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 1.384951881014873, "grad_norm": 0.2931683659553528, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 92940492.0, "reward": 0.559151828289032, "reward_std": 0.20058180391788483, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 593 }, { "clip_ratio/high_max": 0.00278607607469894, "clip_ratio/high_mean": 0.0012522304295998765, "clip_ratio/low_mean": 0.0009870441972452682, "clip_ratio/low_min": 1.60503332153894e-05, "clip_ratio/region_mean": 0.0022392746432160493, "epoch": 1.3872849227179935, "grad_norm": 0.22460319101810455, "learning_rate": 1e-06, "loss": -0.0693, "step": 594 }, { "clip_ratio/high_max": 0.0025723364087753, "clip_ratio/high_mean": 0.001167529693702818, "clip_ratio/low_mean": 0.0011208264222659636, "clip_ratio/low_min": 3.670015212264843e-05, "clip_ratio/region_mean": 0.002288356095959898, "epoch": 1.389617964421114, "grad_norm": 0.2121608704328537, "learning_rate": 1e-06, "loss": -0.0695, "step": 595 }, { "clip_ratio/high_max": 0.0026881626690737903, "clip_ratio/high_mean": 0.0012027653538098093, "clip_ratio/low_mean": 0.001383271490340121, "clip_ratio/low_min": 4.4618191168410704e-05, "clip_ratio/region_mean": 0.002586036855063867, "epoch": 1.3919510061242344, "grad_norm": 0.2061060667037964, "learning_rate": 1e-06, "loss": -0.0696, "step": 596 }, { "clip_ratio/high_max": 0.0021780979295726866, "clip_ratio/high_mean": 0.0008328158983204048, "clip_ratio/low_mean": 0.000534393328052829, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013672092281922232, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 1141.454345703125, "completions/mean_terminated_length": 666.889892578125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 1.3942840478273548, "grad_norm": 0.24844984710216522, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 93551587.0, "reward": 0.5926339626312256, "reward_std": 0.14676952362060547, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 597 }, { "clip_ratio/high_max": 0.002660203506820835, "clip_ratio/high_mean": 0.0010128232497663703, "clip_ratio/low_mean": 0.0007439737037202576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017567969771334901, "epoch": 1.3966170895304755, "grad_norm": 0.2034914344549179, "learning_rate": 1e-06, "loss": -0.056, "step": 598 }, { "clip_ratio/high_max": 0.0026001208971138112, "clip_ratio/high_mean": 0.0009838145906542195, "clip_ratio/low_mean": 0.0009370623502036324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019208769663237035, "epoch": 1.3989501312335957, "grad_norm": 0.19510072469711304, "learning_rate": 1e-06, "loss": -0.0561, "step": 599 }, { "clip_ratio/high_max": 0.0028433833067538217, "clip_ratio/high_mean": 0.0010893139951804187, "clip_ratio/low_mean": 0.0010003991046687588, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020897130962111987, "epoch": 1.4012831729367163, "grad_norm": 0.20516419410705566, "learning_rate": 1e-06, "loss": -0.0562, "step": 600 }, { "clip_ratio/high_max": 0.0019875625002896413, "clip_ratio/high_mean": 0.0007778587914799573, "clip_ratio/low_mean": 0.0005160563669051044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012939151492901146, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3730.0, "completions/mean_length": 1200.193115234375, "completions/mean_terminated_length": 668.46630859375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.4036162146398368, "grad_norm": 0.2528442442417145, "learning_rate": 1e-06, "loss": -0.0485, "num_tokens": 94154816.0, "reward": 0.551339328289032, "reward_std": 0.1614226996898651, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 601 }, { "clip_ratio/high_max": 0.002704072314372752, "clip_ratio/high_mean": 0.0009764729602466105, "clip_ratio/low_mean": 0.0007228711510833818, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016993441022350453, "epoch": 1.4059492563429572, "grad_norm": 0.20775359869003296, "learning_rate": 1e-06, "loss": -0.0487, "step": 602 }, { "clip_ratio/high_max": 0.003099346846283879, "clip_ratio/high_mean": 0.0010006036827689968, "clip_ratio/low_mean": 0.0008107311459752964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018113348705810495, "epoch": 1.4082822980460776, "grad_norm": 0.2684023082256317, "learning_rate": 1e-06, "loss": -0.0488, "step": 603 }, { "clip_ratio/high_max": 0.002917083569627721, "clip_ratio/high_mean": 0.0009770667002158007, "clip_ratio/low_mean": 0.0008976379776868271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018747046633507125, "epoch": 1.410615339749198, "grad_norm": 0.2831137776374817, "learning_rate": 1e-06, "loss": -0.0489, "step": 604 }, { "clip_ratio/high_max": 0.002650014925166033, "clip_ratio/high_mean": 0.0010226274644082878, "clip_ratio/low_mean": 0.0006543864346895134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016770138972788118, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2470.0, "completions/mean_length": 1024.3035888671875, "completions/mean_terminated_length": 629.7027587890625, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 1.4129483814523185, "grad_norm": 0.2720213830471039, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 94739280.0, "reward": 0.6339285969734192, "reward_std": 0.19201312959194183, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 605 }, { "clip_ratio/high_max": 0.0032250007934635505, "clip_ratio/high_mean": 0.001298104503803188, "clip_ratio/low_mean": 0.0007941329222376226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020922373951179907, "epoch": 1.415281423155439, "grad_norm": 0.23052063584327698, "learning_rate": 1e-06, "loss": -0.065, "step": 606 }, { "clip_ratio/high_max": 0.0033462921128375456, "clip_ratio/high_mean": 0.001247998770850245, "clip_ratio/low_mean": 0.0009523386815999402, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022003374760970473, "epoch": 1.4176144648585594, "grad_norm": 0.2190183401107788, "learning_rate": 1e-06, "loss": -0.0651, "step": 607 }, { "clip_ratio/high_max": 0.0036054849551874213, "clip_ratio/high_mean": 0.0013499719243554864, "clip_ratio/low_mean": 0.0010871795147977537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002437151415506378, "epoch": 1.4199475065616798, "grad_norm": 0.2035127729177475, "learning_rate": 1e-06, "loss": -0.0653, "step": 608 }, { "clip_ratio/high_max": 0.0021020623680669814, "clip_ratio/high_mean": 0.0008694770876900293, "clip_ratio/low_mean": 0.00026096853753188043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011304455874778796, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3702.0, "completions/mean_length": 1227.6015625, "completions/mean_terminated_length": 650.8458862304688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.4222805482648002, "grad_norm": 0.22861987352371216, "learning_rate": 1e-06, "loss": -0.0823, "num_tokens": 95314035.0, "reward": 0.535714328289032, "reward_std": 0.14969733357429504, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 609 }, { "clip_ratio/high_max": 0.0026625607642927207, "clip_ratio/high_mean": 0.0010112431737070438, "clip_ratio/low_mean": 0.0003479390888969647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001359182278974913, "epoch": 1.4246135899679206, "grad_norm": 0.18182526528835297, "learning_rate": 1e-06, "loss": -0.0824, "step": 610 }, { "clip_ratio/high_max": 0.002436406837659888, "clip_ratio/high_mean": 0.0009892153648252133, "clip_ratio/low_mean": 0.0004508674383032485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014400828003999777, "epoch": 1.426946631671041, "grad_norm": 0.1634424328804016, "learning_rate": 1e-06, "loss": -0.0825, "step": 611 }, { "clip_ratio/high_max": 0.0025214683264493942, "clip_ratio/high_mean": 0.0010336918057873845, "clip_ratio/low_mean": 0.0005079405591459363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001541632373118773, "epoch": 1.4292796733741615, "grad_norm": 0.21305061876773834, "learning_rate": 1e-06, "loss": -0.0825, "step": 612 }, { "clip_ratio/high_max": 0.0015614713047398254, "clip_ratio/high_mean": 0.0005608082774415379, "clip_ratio/low_mean": 0.00039854151555118733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009593498034519143, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 1116.0491943359375, "completions/mean_terminated_length": 677.2599487304688, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 1.431612715077282, "grad_norm": 0.2500576972961426, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 95925503.0, "reward": 0.598214328289032, "reward_std": 0.14782008528709412, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 613 }, { "clip_ratio/high_max": 0.0018235738316434436, "clip_ratio/high_mean": 0.0006969325741010834, "clip_ratio/low_mean": 0.0005571255578615819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012540581592475064, "epoch": 1.4339457567804024, "grad_norm": 0.1800648272037506, "learning_rate": 1e-06, "loss": -0.0611, "step": 614 }, { "clip_ratio/high_max": 0.0021413853246485814, "clip_ratio/high_mean": 0.000794440147728892, "clip_ratio/low_mean": 0.0006279873937273805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014224275400920305, "epoch": 1.436278798483523, "grad_norm": 0.16388677060604095, "learning_rate": 1e-06, "loss": -0.0612, "step": 615 }, { "clip_ratio/high_max": 0.0021281002591422293, "clip_ratio/high_mean": 0.0007327250787056983, "clip_ratio/low_mean": 0.0007049218756947084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001437646998965647, "epoch": 1.4386118401866432, "grad_norm": 0.18357981741428375, "learning_rate": 1e-06, "loss": -0.0612, "step": 616 }, { "clip_ratio/high_max": 0.002533985083573498, "clip_ratio/high_mean": 0.0010205702928942628, "clip_ratio/low_mean": 0.00045894188770034816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014795121751376428, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3327.0, "completions/mean_length": 1042.779052734375, "completions/mean_terminated_length": 628.7173461914062, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.4409448818897639, "grad_norm": 0.3101976811885834, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 96505889.0, "reward": 0.6127232313156128, "reward_std": 0.17435340583324432, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 617 }, { "clip_ratio/high_max": 0.002833166559867095, "clip_ratio/high_mean": 0.0011558647820493206, "clip_ratio/low_mean": 0.0006090293281886261, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017648941284278408, "epoch": 1.443277923592884, "grad_norm": 0.21514618396759033, "learning_rate": 1e-06, "loss": -0.066, "step": 618 }, { "clip_ratio/high_max": 0.002680325436813291, "clip_ratio/high_mean": 0.0011616850970312953, "clip_ratio/low_mean": 0.0007211005502085754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001882785654743202, "epoch": 1.4456109652960047, "grad_norm": 0.20982342958450317, "learning_rate": 1e-06, "loss": -0.0663, "step": 619 }, { "clip_ratio/high_max": 0.003047146776225418, "clip_ratio/high_mean": 0.0012035941836074926, "clip_ratio/low_mean": 0.0008032410696614534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020068352459929883, "epoch": 1.4479440069991252, "grad_norm": 0.19952359795570374, "learning_rate": 1e-06, "loss": -0.0664, "step": 620 }, { "clip_ratio/high_max": 0.0019531819452822674, "clip_ratio/high_mean": 0.0007085624902174459, "clip_ratio/low_mean": 0.0005314311813435779, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012399936640576925, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 1045.125, "completions/mean_terminated_length": 640.1416015625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 1.4502770487022456, "grad_norm": 0.2716374695301056, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 97101545.0, "reward": 0.6049107313156128, "reward_std": 0.14443765580654144, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 621 }, { "clip_ratio/high_max": 0.0022503573090943974, "clip_ratio/high_mean": 0.0007616569109814009, "clip_ratio/low_mean": 0.000754826652155316, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015164835458563175, "epoch": 1.452610090405366, "grad_norm": 0.4154297113418579, "learning_rate": 1e-06, "loss": -0.0444, "step": 622 }, { "clip_ratio/high_max": 0.0022182204884302337, "clip_ratio/high_mean": 0.000823001873868634, "clip_ratio/low_mean": 0.0008748353093324113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016978372041194234, "epoch": 1.4549431321084865, "grad_norm": 0.23099291324615479, "learning_rate": 1e-06, "loss": -0.0446, "step": 623 }, { "clip_ratio/high_max": 0.0023679707519477233, "clip_ratio/high_mean": 0.0007952347023092443, "clip_ratio/low_mean": 0.0010230364714516327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018182711573899724, "epoch": 1.457276173811607, "grad_norm": 0.224510058760643, "learning_rate": 1e-06, "loss": -0.0446, "step": 624 }, { "clip_ratio/high_max": 0.001993513331399299, "clip_ratio/high_mean": 0.0007664889380976092, "clip_ratio/low_mean": 0.0005510996397788404, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001317588616075227, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 1179.724365234375, "completions/mean_terminated_length": 715.6856689453125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.4596092155147273, "grad_norm": 0.253246933221817, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 97746274.0, "reward": 0.5412946939468384, "reward_std": 0.14882969856262207, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 625 }, { "clip_ratio/high_max": 0.0021818488239659928, "clip_ratio/high_mean": 0.0008976585086202249, "clip_ratio/low_mean": 0.0006950817951292265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015927403183013666, "epoch": 1.4619422572178478, "grad_norm": 0.21737965941429138, "learning_rate": 1e-06, "loss": -0.055, "step": 626 }, { "clip_ratio/high_max": 0.0023206977275549434, "clip_ratio/high_mean": 0.0009187137247863575, "clip_ratio/low_mean": 0.0008546652979930514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017733790300553665, "epoch": 1.4642752989209682, "grad_norm": 0.20063091814517975, "learning_rate": 1e-06, "loss": -0.0551, "step": 627 }, { "clip_ratio/high_max": 0.002261175148305483, "clip_ratio/high_mean": 0.0009258877780666808, "clip_ratio/low_mean": 0.0009686157054602518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018945034826174378, "epoch": 1.4666083406240886, "grad_norm": 3.1463029384613037, "learning_rate": 1e-06, "loss": -0.0522, "step": 628 }, { "clip_ratio/high_max": 0.0018675610699574463, "clip_ratio/high_mean": 0.0007878570031607524, "clip_ratio/low_mean": 0.0003884529660354019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011763099864765536, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2726.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 683.4025268554688, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 1.468941382327209, "grad_norm": 1.2238999605178833, "learning_rate": 1e-06, "loss": -0.0621, "num_tokens": 98377018.0, "reward": 0.5859375, "reward_std": 0.14879760146141052, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 629 }, { "clip_ratio/high_max": 0.0019338292331667617, "clip_ratio/high_mean": 0.0007897362338553648, "clip_ratio/low_mean": 0.0005024289375796798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012921651868964545, "epoch": 1.4712744240303295, "grad_norm": 0.19217050075531006, "learning_rate": 1e-06, "loss": -0.0622, "step": 630 }, { "clip_ratio/high_max": 0.002260987417685101, "clip_ratio/high_mean": 0.0009034892136696726, "clip_ratio/low_mean": 0.000600894496528781, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001504383770225104, "epoch": 1.47360746573345, "grad_norm": 0.19369463622570038, "learning_rate": 1e-06, "loss": -0.0623, "step": 631 }, { "clip_ratio/high_max": 0.0024063340097200125, "clip_ratio/high_mean": 0.0009731234022183344, "clip_ratio/low_mean": 0.0006731857693011989, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016463091742480174, "epoch": 1.4759405074365703, "grad_norm": 0.2207399308681488, "learning_rate": 1e-06, "loss": -0.0623, "step": 632 }, { "clip_ratio/high_max": 0.0026869717112276703, "clip_ratio/high_mean": 0.001069670350261731, "clip_ratio/low_mean": 0.0004924804661641247, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015621508209733292, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3534.0, "completions/mean_length": 1177.58935546875, "completions/mean_terminated_length": 726.2886352539062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 1.4782735491396908, "grad_norm": 0.4338073432445526, "learning_rate": 1e-06, "loss": -0.0848, "num_tokens": 99019162.0, "reward": 0.6071428656578064, "reward_std": 0.17566610872745514, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865824937820435, "step": 633 }, { "clip_ratio/high_max": 0.002957262309791986, "clip_ratio/high_mean": 0.0011660223026410677, "clip_ratio/low_mean": 0.0006873298989376053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018533521943027154, "epoch": 1.4806065908428114, "grad_norm": 0.22073587775230408, "learning_rate": 1e-06, "loss": -0.085, "step": 634 }, { "clip_ratio/high_max": 0.0029644661917700432, "clip_ratio/high_mean": 0.0011194617000001017, "clip_ratio/low_mean": 0.000836331739265006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019557934028853197, "epoch": 1.4829396325459316, "grad_norm": 0.4397682249546051, "learning_rate": 1e-06, "loss": -0.0852, "step": 635 }, { "clip_ratio/high_max": 0.003208911555702798, "clip_ratio/high_mean": 0.0012329146047704853, "clip_ratio/low_mean": 0.000887109217728721, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00212002381158527, "epoch": 1.4852726742490523, "grad_norm": 0.2848373055458069, "learning_rate": 1e-06, "loss": -0.0852, "step": 636 }, { "clip_ratio/high_max": 0.001883948021713877, "clip_ratio/high_mean": 0.0008511904670740478, "clip_ratio/low_mean": 0.0006315238370007137, "clip_ratio/low_min": 5.738918753195321e-05, "clip_ratio/region_mean": 0.0014827142767899204, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3527.0, "completions/mean_length": 1167.9107666015625, "completions/mean_terminated_length": 710.7509765625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.4876057159521727, "grad_norm": 0.2635900378227234, "learning_rate": 1e-06, "loss": -0.0601, "num_tokens": 99660098.0, "reward": 0.598214328289032, "reward_std": 0.1885872334241867, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 637 }, { "clip_ratio/high_max": 0.0021781116593047045, "clip_ratio/high_mean": 0.000966718964264146, "clip_ratio/low_mean": 0.0007830999356883694, "clip_ratio/low_min": 1.972984136955347e-05, "clip_ratio/region_mean": 0.0017498188826721162, "epoch": 1.4899387576552932, "grad_norm": 1.5884244441986084, "learning_rate": 1e-06, "loss": -0.0602, "step": 638 }, { "clip_ratio/high_max": 0.0027367837355996016, "clip_ratio/high_mean": 0.001106985415390227, "clip_ratio/low_mean": 0.0008830568021949148, "clip_ratio/low_min": 4.836259540752508e-05, "clip_ratio/region_mean": 0.0019900422339560464, "epoch": 1.4922717993584136, "grad_norm": 0.5398880839347839, "learning_rate": 1e-06, "loss": -0.0603, "step": 639 }, { "clip_ratio/high_max": 0.002245472169306595, "clip_ratio/high_mean": 0.000960655908784247, "clip_ratio/low_mean": 0.0009723521379783051, "clip_ratio/low_min": 7.21482638255111e-05, "clip_ratio/region_mean": 0.0019330080831423402, "epoch": 1.494604841061534, "grad_norm": 0.17548836767673492, "learning_rate": 1e-06, "loss": -0.0605, "step": 640 }, { "clip_ratio/high_max": 0.0024165987852029502, "clip_ratio/high_mean": 0.001128620229792432, "clip_ratio/low_mean": 0.0008026952018553857, "clip_ratio/low_min": 2.4338005459867418e-05, "clip_ratio/region_mean": 0.0019313154116389342, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3311.0, "completions/mean_length": 1188.0625, "completions/mean_terminated_length": 694.54833984375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 1.4969378827646544, "grad_norm": 0.2950807213783264, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 100274514.0, "reward": 0.5792410969734192, "reward_std": 0.21106158196926117, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 641 }, { "clip_ratio/high_max": 0.002922682797361631, "clip_ratio/high_mean": 0.0012372677556413691, "clip_ratio/low_mean": 0.0009788087954802904, "clip_ratio/low_min": 2.3408239940181375e-05, "clip_ratio/region_mean": 0.0022160765365697443, "epoch": 1.4992709244677749, "grad_norm": 1.4709162712097168, "learning_rate": 1e-06, "loss": -0.0573, "step": 642 }, { "clip_ratio/high_max": 0.003014924041053746, "clip_ratio/high_mean": 0.0013231397097115405, "clip_ratio/low_mean": 0.0010460192897880916, "clip_ratio/low_min": 4.450510459719226e-05, "clip_ratio/region_mean": 0.0023691590322414413, "epoch": 1.5016039661708953, "grad_norm": 0.2877597212791443, "learning_rate": 1e-06, "loss": -0.0575, "step": 643 }, { "clip_ratio/high_max": 0.002821007787133567, "clip_ratio/high_mean": 0.0012275290901015978, "clip_ratio/low_mean": 0.0013115402725816239, "clip_ratio/low_min": 3.511235991027206e-05, "clip_ratio/region_mean": 0.002539069304475561, "epoch": 1.5039370078740157, "grad_norm": 0.34267091751098633, "learning_rate": 1e-06, "loss": -0.0576, "step": 644 }, { "clip_ratio/high_max": 0.0023731205365038477, "clip_ratio/high_mean": 0.001024390277962084, "clip_ratio/low_mean": 0.0006500949120891164, "clip_ratio/low_min": 7.204610938060796e-06, "clip_ratio/region_mean": 0.0016744851927796844, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 1142.673095703125, "completions/mean_terminated_length": 729.3574829101562, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 1.5062700495771362, "grad_norm": 0.3838561177253723, "learning_rate": 1e-06, "loss": -0.0728, "num_tokens": 100943229.0, "reward": 0.5223214626312256, "reward_std": 0.21387313306331635, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 645 }, { "clip_ratio/high_max": 0.002689969271159498, "clip_ratio/high_mean": 0.001251271452929359, "clip_ratio/low_mean": 0.0009535128792776959, "clip_ratio/low_min": 7.204610938060796e-06, "clip_ratio/region_mean": 0.0022047843667678535, "epoch": 1.5086030912802566, "grad_norm": 0.329629510641098, "learning_rate": 1e-06, "loss": -0.073, "step": 646 }, { "clip_ratio/high_max": 0.0030532400123775005, "clip_ratio/high_mean": 0.0013139141192368697, "clip_ratio/low_mean": 0.0012079098414687905, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025218239607056603, "epoch": 1.510936132983377, "grad_norm": 1.3134796619415283, "learning_rate": 1e-06, "loss": -0.073, "step": 647 }, { "clip_ratio/high_max": 0.0029804853693349287, "clip_ratio/high_mean": 0.0013024894251429942, "clip_ratio/low_mean": 0.001235449270097888, "clip_ratio/low_min": 7.204610938060796e-06, "clip_ratio/region_mean": 0.002537938635214232, "epoch": 1.5132691746864975, "grad_norm": 0.32426050305366516, "learning_rate": 1e-06, "loss": -0.0733, "step": 648 }, { "clip_ratio/high_max": 0.002436948925605975, "clip_ratio/high_mean": 0.0008990931328298757, "clip_ratio/low_mean": 0.0007103607449607807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016094539132609498, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3683.0, "completions/mean_length": 1126.485595703125, "completions/mean_terminated_length": 667.2821655273438, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 1.5156022163896181, "grad_norm": 0.6749327182769775, "learning_rate": 1e-06, "loss": -0.047, "num_tokens": 101562656.0, "reward": 0.5055803656578064, "reward_std": 0.17757754027843475, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 649 }, { "clip_ratio/high_max": 0.0028321968275122344, "clip_ratio/high_mean": 0.0010125900462298887, "clip_ratio/low_mean": 0.0008382726591662504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018508627108531073, "epoch": 1.5179352580927383, "grad_norm": 0.3969179689884186, "learning_rate": 1e-06, "loss": -0.0472, "step": 650 }, { "clip_ratio/high_max": 0.0029480049561243504, "clip_ratio/high_mean": 0.0010269809317833278, "clip_ratio/low_mean": 0.0010003093266277574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020272902183933184, "epoch": 1.520268299795859, "grad_norm": 0.22816422581672668, "learning_rate": 1e-06, "loss": -0.0474, "step": 651 }, { "clip_ratio/high_max": 0.0028703065690933727, "clip_ratio/high_mean": 0.000996035676507745, "clip_ratio/low_mean": 0.0012484482213039882, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002244483956019394, "epoch": 1.5226013414989792, "grad_norm": 0.20780935883522034, "learning_rate": 1e-06, "loss": -0.0475, "step": 652 }, { "clip_ratio/high_max": 0.002191218580264831, "clip_ratio/high_mean": 0.0006370077280735131, "clip_ratio/low_mean": 0.0005887188963242806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012257266316737514, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 1067.3404541015625, "completions/mean_terminated_length": 673.9583740234375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 1.5249343832020998, "grad_norm": 0.2642171084880829, "learning_rate": 1e-06, "loss": -0.014, "num_tokens": 102190593.0, "reward": 0.5491071939468384, "reward_std": 0.12381298840045929, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 653 }, { "clip_ratio/high_max": 0.002338791331567336, "clip_ratio/high_mean": 0.0007418377654175856, "clip_ratio/low_mean": 0.00081002667593566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015518644286203198, "epoch": 1.52726742490522, "grad_norm": 0.1977003663778305, "learning_rate": 1e-06, "loss": -0.0143, "step": 654 }, { "clip_ratio/high_max": 0.0022030055515642744, "clip_ratio/high_mean": 0.000714083582352032, "clip_ratio/low_mean": 0.0008968455263129727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016109291045722784, "epoch": 1.5296004666083407, "grad_norm": 0.19976159930229187, "learning_rate": 1e-06, "loss": -0.0143, "step": 655 }, { "clip_ratio/high_max": 0.002761727104370948, "clip_ratio/high_mean": 0.0007561589045508299, "clip_ratio/low_mean": 0.0009836620192800183, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017398208728991449, "epoch": 1.531933508311461, "grad_norm": 0.44394370913505554, "learning_rate": 1e-06, "loss": -0.0144, "step": 656 }, { "clip_ratio/high_max": 0.002771669715002645, "clip_ratio/high_mean": 0.0009958619666576851, "clip_ratio/low_mean": 0.0005350391102183494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015309010705095716, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 1256.4754638671875, "completions/mean_terminated_length": 703.7146606445312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.5342665500145816, "grad_norm": 0.3369171619415283, "learning_rate": 1e-06, "loss": -0.0503, "num_tokens": 102810267.0, "reward": 0.5390625, "reward_std": 0.16378918290138245, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 657 }, { "clip_ratio/high_max": 0.00277820517658256, "clip_ratio/high_mean": 0.0011231052267248742, "clip_ratio/low_mean": 0.0007417504630211624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001864855657913722, "epoch": 1.536599591717702, "grad_norm": 0.24442297220230103, "learning_rate": 1e-06, "loss": -0.0506, "step": 658 }, { "clip_ratio/high_max": 0.0029022171729593538, "clip_ratio/high_mean": 0.001111861605750164, "clip_ratio/low_mean": 0.0007777120290484163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018895736793638207, "epoch": 1.5389326334208224, "grad_norm": 0.2545433044433594, "learning_rate": 1e-06, "loss": -0.0507, "step": 659 }, { "clip_ratio/high_max": 0.0032724814445828088, "clip_ratio/high_mean": 0.0011937184426642489, "clip_ratio/low_mean": 0.0008917640434447094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020854824979323894, "epoch": 1.5412656751239429, "grad_norm": 0.22315526008605957, "learning_rate": 1e-06, "loss": -0.0508, "step": 660 }, { "clip_ratio/high_max": 0.0022636907124251593, "clip_ratio/high_mean": 0.0007799373834131984, "clip_ratio/low_mean": 0.0005420454330078428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001321982799709076, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2809.0, "completions/mean_length": 1014.2344360351562, "completions/mean_terminated_length": 609.55810546875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.5435987168270633, "grad_norm": 0.25587037205696106, "learning_rate": 1e-06, "loss": -0.0497, "num_tokens": 103386597.0, "reward": 0.5323660969734192, "reward_std": 0.13782815635204315, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 661 }, { "clip_ratio/high_max": 0.0026036134440801106, "clip_ratio/high_mean": 0.0009059934709512163, "clip_ratio/low_mean": 0.0008013792884185023, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017073727794922888, "epoch": 1.5459317585301837, "grad_norm": 0.20454640686511993, "learning_rate": 1e-06, "loss": -0.0498, "step": 662 }, { "clip_ratio/high_max": 0.0030446366872638464, "clip_ratio/high_mean": 0.000996074959402904, "clip_ratio/low_mean": 0.0008478511115299625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001843926052970346, "epoch": 1.5482648002333042, "grad_norm": 0.20069797337055206, "learning_rate": 1e-06, "loss": -0.05, "step": 663 }, { "clip_ratio/high_max": 0.002869484364055097, "clip_ratio/high_mean": 0.0009581043177604442, "clip_ratio/low_mean": 0.0010110474136126868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019691517154569738, "epoch": 1.5505978419364246, "grad_norm": 0.18406644463539124, "learning_rate": 1e-06, "loss": -0.0501, "step": 664 }, { "clip_ratio/high_max": 0.0022474951983895153, "clip_ratio/high_mean": 0.0009387328645971138, "clip_ratio/low_mean": 0.0005979755378575646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001536708397907205, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3656.0, "completions/mean_length": 1132.974365234375, "completions/mean_terminated_length": 670.3600463867188, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 1.552930883639545, "grad_norm": 0.30719760060310364, "learning_rate": 1e-06, "loss": -0.0642, "num_tokens": 103989030.0, "reward": 0.5881696939468384, "reward_std": 0.1997455656528473, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 665 }, { "clip_ratio/high_max": 0.002812632519635372, "clip_ratio/high_mean": 0.001159242594440002, "clip_ratio/low_mean": 0.0007812552730683819, "clip_ratio/low_min": 1.321911986451596e-05, "clip_ratio/region_mean": 0.001940497852046974, "epoch": 1.5552639253426657, "grad_norm": 0.2733432650566101, "learning_rate": 1e-06, "loss": -0.0645, "step": 666 }, { "clip_ratio/high_max": 0.002874612000596244, "clip_ratio/high_mean": 0.0011683117336360738, "clip_ratio/low_mean": 0.0009152647562586935, "clip_ratio/low_min": 1.628452264412772e-05, "clip_ratio/region_mean": 0.002083576546283439, "epoch": 1.5575969670457859, "grad_norm": 0.2152608335018158, "learning_rate": 1e-06, "loss": -0.0647, "step": 667 }, { "clip_ratio/high_max": 0.002648581357789226, "clip_ratio/high_mean": 0.0010980963616020745, "clip_ratio/low_mean": 0.0011179804241692182, "clip_ratio/low_min": 1.628452264412772e-05, "clip_ratio/region_mean": 0.0022160768348840065, "epoch": 1.5599300087489065, "grad_norm": 0.32514089345932007, "learning_rate": 1e-06, "loss": -0.0647, "step": 668 }, { "clip_ratio/high_max": 0.002597786580736283, "clip_ratio/high_mean": 0.0009842641156865284, "clip_ratio/low_mean": 0.00060048932027712, "clip_ratio/low_min": 3.234152609365992e-05, "clip_ratio/region_mean": 0.0015847534232307225, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 1235.4989013671875, "completions/mean_terminated_length": 714.7216796875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.5622630504520267, "grad_norm": 0.2672403156757355, "learning_rate": 1e-06, "loss": -0.0751, "num_tokens": 104615133.0, "reward": 0.551339328289032, "reward_std": 0.18881364166736603, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 669 }, { "clip_ratio/high_max": 0.0032760405956651084, "clip_ratio/high_mean": 0.0012572851592267398, "clip_ratio/low_mean": 0.0007416671396640595, "clip_ratio/low_min": 2.696289993764367e-05, "clip_ratio/region_mean": 0.0019989522988907993, "epoch": 1.5645960921551474, "grad_norm": 0.21650661528110504, "learning_rate": 1e-06, "loss": -0.0753, "step": 670 }, { "clip_ratio/high_max": 0.002967457963677589, "clip_ratio/high_mean": 0.0012099871491955128, "clip_ratio/low_mean": 0.0009079160136025166, "clip_ratio/low_min": 2.696289993764367e-05, "clip_ratio/region_mean": 0.002117903139151167, "epoch": 1.5669291338582676, "grad_norm": 0.4154553711414337, "learning_rate": 1e-06, "loss": -0.0754, "step": 671 }, { "clip_ratio/high_max": 0.0030442417264566757, "clip_ratio/high_mean": 0.0012472419948608149, "clip_ratio/low_mean": 0.0010560988521319814, "clip_ratio/low_min": 3.234152609365992e-05, "clip_ratio/region_mean": 0.0023033408651826903, "epoch": 1.5692621755613883, "grad_norm": 0.2203279733657837, "learning_rate": 1e-06, "loss": -0.0754, "step": 672 }, { "clip_ratio/high_max": 0.002457397185935406, "clip_ratio/high_mean": 0.001032453332300065, "clip_ratio/low_mean": 0.0006016580900904955, "clip_ratio/low_min": 1.7356289390590973e-05, "clip_ratio/region_mean": 0.0016341114460374229, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3909.0, "completions/mean_length": 1224.646240234375, "completions/mean_terminated_length": 647.2962646484375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 1.5715952172645085, "grad_norm": 0.30400481820106506, "learning_rate": 1e-06, "loss": -0.0603, "num_tokens": 105185816.0, "reward": 0.578125, "reward_std": 0.18028777837753296, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 673 }, { "clip_ratio/high_max": 0.0026887204803642817, "clip_ratio/high_mean": 0.0011710413582477486, "clip_ratio/low_mean": 0.0008216320393330534, "clip_ratio/low_min": 3.205950270057656e-05, "clip_ratio/region_mean": 0.001992673373024445, "epoch": 1.5739282589676291, "grad_norm": 0.2617240846157074, "learning_rate": 1e-06, "loss": -0.0606, "step": 674 }, { "clip_ratio/high_max": 0.0029412625517579727, "clip_ratio/high_mean": 0.0012388542600092478, "clip_ratio/low_mean": 0.0010205229791608872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022593772373511456, "epoch": 1.5762613006707495, "grad_norm": 0.24544773995876312, "learning_rate": 1e-06, "loss": -0.0607, "step": 675 }, { "clip_ratio/high_max": 0.0028354020178085193, "clip_ratio/high_mean": 0.0012318099470576271, "clip_ratio/low_mean": 0.001147368212514266, "clip_ratio/low_min": 4.8089252231875435e-05, "clip_ratio/region_mean": 0.0023791781277395785, "epoch": 1.57859434237387, "grad_norm": 0.25721755623817444, "learning_rate": 1e-06, "loss": -0.0609, "step": 676 }, { "clip_ratio/high_max": 0.001986108351047733, "clip_ratio/high_mean": 0.000743728496672702, "clip_ratio/low_mean": 0.0005497064330484136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001293434965191409, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 1122.375, "completions/mean_terminated_length": 693.2311401367188, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 1.5809273840769904, "grad_norm": 0.41420695185661316, "learning_rate": 1e-06, "loss": -0.0696, "num_tokens": 105817784.0, "reward": 0.5479910969734192, "reward_std": 0.155902698636055, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 677 }, { "clip_ratio/high_max": 0.0027282146584184375, "clip_ratio/high_mean": 0.0010669030289136572, "clip_ratio/low_mean": 0.0006302873607637594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016971904042293318, "epoch": 1.5832604257801108, "grad_norm": 0.22586533427238464, "learning_rate": 1e-06, "loss": -0.0699, "step": 678 }, { "clip_ratio/high_max": 0.0024417053537035827, "clip_ratio/high_mean": 0.000980118353254511, "clip_ratio/low_mean": 0.0007825901175237959, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017627084016567096, "epoch": 1.5855934674832313, "grad_norm": 0.25418341159820557, "learning_rate": 1e-06, "loss": -0.07, "step": 679 }, { "clip_ratio/high_max": 0.00250152604712639, "clip_ratio/high_mean": 0.0009698775656943326, "clip_ratio/low_mean": 0.0009185781736960053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018884557794081047, "epoch": 1.5879265091863517, "grad_norm": 0.20713964104652405, "learning_rate": 1e-06, "loss": -0.0701, "step": 680 }, { "clip_ratio/high_max": 0.0026368986946181394, "clip_ratio/high_mean": 0.001084289675418404, "clip_ratio/low_mean": 0.000728253367014986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018125430469808634, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 1119.029052734375, "completions/mean_terminated_length": 702.4046020507812, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.5902595508894721, "grad_norm": 23.57373809814453, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 106460962.0, "reward": 0.551339328289032, "reward_std": 0.1847137063741684, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 681 }, { "clip_ratio/high_max": 0.0027478109186631627, "clip_ratio/high_mean": 0.0011969626029895153, "clip_ratio/low_mean": 0.0007512870324717369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001948249657289125, "epoch": 1.5925925925925926, "grad_norm": 5.8721489906311035, "learning_rate": 1e-06, "loss": -0.0504, "step": 682 }, { "clip_ratio/high_max": 0.0025349690185976215, "clip_ratio/high_mean": 0.0010864624637179077, "clip_ratio/low_mean": 0.0009125418055191403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019990042346762493, "epoch": 1.594925634295713, "grad_norm": 0.5025140643119812, "learning_rate": 1e-06, "loss": -0.0507, "step": 683 }, { "clip_ratio/high_max": 0.003364331911143381, "clip_ratio/high_mean": 0.0012594885592989158, "clip_ratio/low_mean": 0.0009790292042453075, "clip_ratio/low_min": 1.3116474292473868e-05, "clip_ratio/region_mean": 0.0022385177871910855, "epoch": 1.5972586759988334, "grad_norm": 0.2561322748661041, "learning_rate": 1e-06, "loss": -0.051, "step": 684 }, { "clip_ratio/high_max": 0.002378983575908933, "clip_ratio/high_mean": 0.0010212671004410367, "clip_ratio/low_mean": 0.0006282021267907112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016494692426931579, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 1127.22998046875, "completions/mean_terminated_length": 690.0870971679688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.599591717701954, "grad_norm": 0.5263251662254333, "learning_rate": 1e-06, "loss": -0.0589, "num_tokens": 107091672.0, "reward": 0.5959821939468384, "reward_std": 0.2022654116153717, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 685 }, { "clip_ratio/high_max": 0.0027533315660548396, "clip_ratio/high_mean": 0.0011889918569067959, "clip_ratio/low_mean": 0.0007750206968921702, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001964012557436945, "epoch": 1.6019247594050743, "grad_norm": 0.2786444127559662, "learning_rate": 1e-06, "loss": -0.0592, "step": 686 }, { "clip_ratio/high_max": 0.0026220103609375656, "clip_ratio/high_mean": 0.001100689551094547, "clip_ratio/low_mean": 0.0009440878502573469, "clip_ratio/low_min": 2.4159257009159774e-05, "clip_ratio/region_mean": 0.0020447773931664415, "epoch": 1.604257801108195, "grad_norm": 0.225407212972641, "learning_rate": 1e-06, "loss": -0.0594, "step": 687 }, { "clip_ratio/high_max": 0.002552107638621237, "clip_ratio/high_mean": 0.001106837185943732, "clip_ratio/low_mean": 0.001083293237570615, "clip_ratio/low_min": 2.4159257009159774e-05, "clip_ratio/region_mean": 0.00219013039895799, "epoch": 1.6065908428113151, "grad_norm": 0.22912397980690002, "learning_rate": 1e-06, "loss": -0.0595, "step": 688 }, { "clip_ratio/high_max": 0.001984551447094418, "clip_ratio/high_mean": 0.0008755589569773292, "clip_ratio/low_mean": 0.0006188168863445753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014943758433219045, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2697.0, "completions/mean_length": 1034.4710693359375, "completions/mean_terminated_length": 623.6835327148438, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 1.6089238845144358, "grad_norm": 0.3455251157283783, "learning_rate": 1e-06, "loss": -0.059, "num_tokens": 107664830.0, "reward": 0.5970982313156128, "reward_std": 0.1634196788072586, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 689 }, { "clip_ratio/high_max": 0.0026572456481517293, "clip_ratio/high_mean": 0.0010708043300837744, "clip_ratio/low_mean": 0.0008035079936234979, "clip_ratio/low_min": 1.6348418284906074e-05, "clip_ratio/region_mean": 0.0018743123437161557, "epoch": 1.611256926217556, "grad_norm": 0.39202776551246643, "learning_rate": 1e-06, "loss": -0.0592, "step": 690 }, { "clip_ratio/high_max": 0.002687043663172517, "clip_ratio/high_mean": 0.0010083333672810113, "clip_ratio/low_mean": 0.0009298997329096892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019382330574444495, "epoch": 1.6135899679206767, "grad_norm": 0.21114280819892883, "learning_rate": 1e-06, "loss": -0.0589, "step": 691 }, { "clip_ratio/high_max": 0.002561726636486128, "clip_ratio/high_mean": 0.0010162810522160726, "clip_ratio/low_mean": 0.001086448528440087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021027295806561597, "epoch": 1.6159230096237969, "grad_norm": 0.20914065837860107, "learning_rate": 1e-06, "loss": -0.0594, "step": 692 }, { "clip_ratio/high_max": 0.002448424231261015, "clip_ratio/high_mean": 0.0010212116776529, "clip_ratio/low_mean": 0.0006204062156029977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016416179278166965, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3266.0, "completions/mean_length": 1040.48779296875, "completions/mean_terminated_length": 647.9659423828125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.6182560513269175, "grad_norm": 0.32229918241500854, "learning_rate": 1e-06, "loss": -0.0554, "num_tokens": 108269627.0, "reward": 0.5703125, "reward_std": 0.1830301135778427, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 693 }, { "clip_ratio/high_max": 0.0025403390682186, "clip_ratio/high_mean": 0.0010841919374797726, "clip_ratio/low_mean": 0.0008559281086490955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001940120018844027, "epoch": 1.620589093030038, "grad_norm": 0.23361806571483612, "learning_rate": 1e-06, "loss": -0.0558, "step": 694 }, { "clip_ratio/high_max": 0.002476399102306459, "clip_ratio/high_mean": 0.0012197207215649541, "clip_ratio/low_mean": 0.0009554794178256998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002175200126657728, "epoch": 1.6229221347331584, "grad_norm": 0.23735173046588898, "learning_rate": 1e-06, "loss": -0.0558, "step": 695 }, { "clip_ratio/high_max": 0.003198175967554562, "clip_ratio/high_mean": 0.0012350379438430537, "clip_ratio/low_mean": 0.0011255836852797074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023606216636835597, "epoch": 1.6252551764362788, "grad_norm": 0.22560842335224152, "learning_rate": 1e-06, "loss": -0.0559, "step": 696 }, { "clip_ratio/high_max": 0.001656759857723955, "clip_ratio/high_mean": 0.0006567530836036894, "clip_ratio/low_mean": 0.000423733228672063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001080486301361816, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 1133.9107666015625, "completions/mean_terminated_length": 697.7515869140625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 1.6275882181393992, "grad_norm": 0.25505128502845764, "learning_rate": 1e-06, "loss": -0.0508, "num_tokens": 108922939.0, "reward": 0.5290178656578064, "reward_std": 0.15135477483272552, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 697 }, { "clip_ratio/high_max": 0.0021531387319555506, "clip_ratio/high_mean": 0.0008437033466179855, "clip_ratio/low_mean": 0.0005505627250386169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013942660843895283, "epoch": 1.6299212598425197, "grad_norm": 0.17824582755565643, "learning_rate": 1e-06, "loss": -0.051, "step": 698 }, { "clip_ratio/high_max": 0.00202299389638938, "clip_ratio/high_mean": 0.0008497606831951998, "clip_ratio/low_mean": 0.0006875479039081256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015373086316685658, "epoch": 1.63225430154564, "grad_norm": 0.2280644178390503, "learning_rate": 1e-06, "loss": -0.051, "step": 699 }, { "clip_ratio/high_max": 0.0019253981590736657, "clip_ratio/high_mean": 0.0007718354845565045, "clip_ratio/low_mean": 0.0007603771891808719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001532212660094956, "epoch": 1.6345873432487605, "grad_norm": 0.201303631067276, "learning_rate": 1e-06, "loss": -0.0511, "step": 700 }, { "clip_ratio/high_max": 0.0025552319129928946, "clip_ratio/high_mean": 0.0008661330266477307, "clip_ratio/low_mean": 0.0006004089000271051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001466541947593214, "completions/clipped_ratio": 0.1774553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 1311.7801513671875, "completions/mean_terminated_length": 711.114013671875, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 1.636920384951881, "grad_norm": 0.30581483244895935, "learning_rate": 1e-06, "loss": -0.0466, "num_tokens": 109537334.0, "reward": 0.5412946939468384, "reward_std": 0.1721000224351883, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 701 }, { "clip_ratio/high_max": 0.003085017582634464, "clip_ratio/high_mean": 0.001095774077839451, "clip_ratio/low_mean": 0.0007239052683871705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018196793243987486, "epoch": 1.6392534266550016, "grad_norm": 0.22841838002204895, "learning_rate": 1e-06, "loss": -0.0468, "step": 702 }, { "clip_ratio/high_max": 0.0033996423735516146, "clip_ratio/high_mean": 0.0011916830226255115, "clip_ratio/low_mean": 0.001012575596178067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022042586642783135, "epoch": 1.6415864683581218, "grad_norm": 0.2255060076713562, "learning_rate": 1e-06, "loss": -0.0471, "step": 703 }, { "clip_ratio/high_max": 0.0030149108133628033, "clip_ratio/high_mean": 0.0010909341071965173, "clip_ratio/low_mean": 0.0010709714697441086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021619055696646683, "epoch": 1.6439195100612425, "grad_norm": 0.22758369147777557, "learning_rate": 1e-06, "loss": -0.0471, "step": 704 }, { "clip_ratio/high_max": 0.002085322768834885, "clip_ratio/high_mean": 0.0008085589088295819, "clip_ratio/low_mean": 0.0004991260957467603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013076850045763422, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 1237.099365234375, "completions/mean_terminated_length": 662.2533569335938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 1.6462525517643627, "grad_norm": 0.347273051738739, "learning_rate": 1e-06, "loss": -0.0336, "num_tokens": 110121991.0, "reward": 0.5602678656578064, "reward_std": 0.14504244923591614, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 705 }, { "clip_ratio/high_max": 0.0026197285260423087, "clip_ratio/high_mean": 0.0009480943353992188, "clip_ratio/low_mean": 0.00072898269900179, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016770770380389877, "epoch": 1.6485855934674833, "grad_norm": 0.2560538053512573, "learning_rate": 1e-06, "loss": -0.0339, "step": 706 }, { "clip_ratio/high_max": 0.0027566683493205346, "clip_ratio/high_mean": 0.000984727008471964, "clip_ratio/low_mean": 0.0008359698204003507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018206968015874736, "epoch": 1.6509186351706036, "grad_norm": 0.23758842051029205, "learning_rate": 1e-06, "loss": -0.034, "step": 707 }, { "clip_ratio/high_max": 0.002730562016949989, "clip_ratio/high_mean": 0.0008625241007393925, "clip_ratio/low_mean": 0.0010405926041130442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019031167175853625, "epoch": 1.6532516768737242, "grad_norm": 0.2221532016992569, "learning_rate": 1e-06, "loss": -0.0341, "step": 708 }, { "clip_ratio/high_max": 0.002698038784728851, "clip_ratio/high_mean": 0.0010320206092728768, "clip_ratio/low_mean": 0.0005404828734754119, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015725034827482887, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 1144.90966796875, "completions/mean_terminated_length": 648.5723266601562, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.6555847185768444, "grad_norm": 0.3546317517757416, "learning_rate": 1e-06, "loss": -0.0546, "num_tokens": 110702902.0, "reward": 0.5502232313156128, "reward_std": 0.18888963758945465, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 709 }, { "clip_ratio/high_max": 0.0032468070203321986, "clip_ratio/high_mean": 0.0012757872100337408, "clip_ratio/low_mean": 0.0007859139659558423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020617012269212864, "epoch": 1.657917760279965, "grad_norm": 0.4355243444442749, "learning_rate": 1e-06, "loss": -0.0548, "step": 710 }, { "clip_ratio/high_max": 0.0034648421133169904, "clip_ratio/high_mean": 0.001307296257436974, "clip_ratio/low_mean": 0.0008862760132615222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021935723270871677, "epoch": 1.6602508019830855, "grad_norm": 0.2820480465888977, "learning_rate": 1e-06, "loss": -0.055, "step": 711 }, { "clip_ratio/high_max": 0.0034890639362856746, "clip_ratio/high_mean": 0.0013540016880142502, "clip_ratio/low_mean": 0.0011455987678345991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002499600472219754, "epoch": 1.662583843686206, "grad_norm": 0.5151032209396362, "learning_rate": 1e-06, "loss": -0.0551, "step": 712 }, { "clip_ratio/high_max": 0.0026658865390345454, "clip_ratio/high_mean": 0.000982806279353099, "clip_ratio/low_mean": 0.0005997703538014321, "clip_ratio/low_min": 7.753380486974493e-06, "clip_ratio/region_mean": 0.001582576624059584, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 1120.8739013671875, "completions/mean_terminated_length": 647.4708862304688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.6649168853893264, "grad_norm": 1.5790756940841675, "learning_rate": 1e-06, "loss": -0.0475, "num_tokens": 111285581.0, "reward": 0.5636160969734192, "reward_std": 0.18058130145072937, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 713 }, { "clip_ratio/high_max": 0.002504130097804591, "clip_ratio/high_mean": 0.0010163060505874455, "clip_ratio/low_mean": 0.0007436392897943733, "clip_ratio/low_min": 1.5506760973948985e-05, "clip_ratio/region_mean": 0.0017599453422008082, "epoch": 1.6672499270924468, "grad_norm": 0.27533194422721863, "learning_rate": 1e-06, "loss": -0.0477, "step": 714 }, { "clip_ratio/high_max": 0.002612814416352194, "clip_ratio/high_mean": 0.0010749164102890063, "clip_ratio/low_mean": 0.0009455912722842186, "clip_ratio/low_min": 1.5506760973948985e-05, "clip_ratio/region_mean": 0.002020507716224529, "epoch": 1.6695829687955672, "grad_norm": 0.3125113248825073, "learning_rate": 1e-06, "loss": -0.048, "step": 715 }, { "clip_ratio/high_max": 0.0025677713856566697, "clip_ratio/high_mean": 0.0011013084003934637, "clip_ratio/low_mean": 0.0011349204478392494, "clip_ratio/low_min": 1.1034604540327564e-05, "clip_ratio/region_mean": 0.0022362288800650276, "epoch": 1.6719160104986877, "grad_norm": 0.23288454115390778, "learning_rate": 1e-06, "loss": -0.0481, "step": 716 }, { "clip_ratio/high_max": 0.002815164923958946, "clip_ratio/high_mean": 0.001207950614116271, "clip_ratio/low_mean": 0.0007721511083218502, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019801017551799305, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 1129.286865234375, "completions/mean_terminated_length": 621.261474609375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.674249052201808, "grad_norm": 0.3266702890396118, "learning_rate": 1e-06, "loss": -0.0782, "num_tokens": 111847750.0, "reward": 0.5892857313156128, "reward_std": 0.18765109777450562, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 717 }, { "clip_ratio/high_max": 0.0031648338117520325, "clip_ratio/high_mean": 0.0013632078771479428, "clip_ratio/low_mean": 0.0008790632273303345, "clip_ratio/low_min": 2.461114308971446e-05, "clip_ratio/region_mean": 0.0022422711408580653, "epoch": 1.6765820939049285, "grad_norm": 0.2589649260044098, "learning_rate": 1e-06, "loss": -0.0784, "step": 718 }, { "clip_ratio/high_max": 0.0032027985143940896, "clip_ratio/high_mean": 0.0013695933666895144, "clip_ratio/low_mean": 0.001074670915841125, "clip_ratio/low_min": 2.461114308971446e-05, "clip_ratio/region_mean": 0.002444264253426809, "epoch": 1.678915135608049, "grad_norm": 0.2547096908092499, "learning_rate": 1e-06, "loss": -0.0786, "step": 719 }, { "clip_ratio/high_max": 0.0031522763238172047, "clip_ratio/high_mean": 0.0013668813480762765, "clip_ratio/low_mean": 0.0012862763906014152, "clip_ratio/low_min": 1.230557154485723e-05, "clip_ratio/region_mean": 0.0026531577605055645, "epoch": 1.6812481773111694, "grad_norm": 0.22740402817726135, "learning_rate": 1e-06, "loss": -0.0787, "step": 720 }, { "clip_ratio/high_max": 0.002459034643834457, "clip_ratio/high_mean": 0.0009829897135205101, "clip_ratio/low_mean": 0.0007839340250939131, "clip_ratio/low_min": 5.93514705542475e-05, "clip_ratio/region_mean": 0.0017669237204245292, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 1150.0201416015625, "completions/mean_terminated_length": 631.9606323242188, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 1.68358121901429, "grad_norm": 0.5587748289108276, "learning_rate": 1e-06, "loss": -0.0828, "num_tokens": 112412152.0, "reward": 0.6026785969734192, "reward_std": 0.191331684589386, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 721 }, { "clip_ratio/high_max": 0.0029961943146190606, "clip_ratio/high_mean": 0.0011621251032920554, "clip_ratio/low_mean": 0.0010288262703852524, "clip_ratio/low_min": 1.5303623513318598e-05, "clip_ratio/region_mean": 0.0021909513452555984, "epoch": 1.6859142607174102, "grad_norm": 0.2969554364681244, "learning_rate": 1e-06, "loss": -0.0832, "step": 722 }, { "clip_ratio/high_max": 0.0031718460959382355, "clip_ratio/high_mean": 0.00123020840328536, "clip_ratio/low_mean": 0.0012507029889547994, "clip_ratio/low_min": 4.064449603902176e-05, "clip_ratio/region_mean": 0.0024809113892843015, "epoch": 1.688247302420531, "grad_norm": 0.2799740433692932, "learning_rate": 1e-06, "loss": -0.0834, "step": 723 }, { "clip_ratio/high_max": 0.0026023879981948994, "clip_ratio/high_mean": 0.001188766666018637, "clip_ratio/low_mean": 0.001419997277935181, "clip_ratio/low_min": 8.171108493115753e-05, "clip_ratio/region_mean": 0.0026087639562319964, "epoch": 1.690580344123651, "grad_norm": 0.4607809782028198, "learning_rate": 1e-06, "loss": -0.0834, "step": 724 }, { "clip_ratio/high_max": 0.0023307888841372915, "clip_ratio/high_mean": 0.0008368470626010094, "clip_ratio/low_mean": 0.0006770364561816677, "clip_ratio/low_min": 1.246758438355755e-05, "clip_ratio/region_mean": 0.0015138835115067195, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3751.0, "completions/mean_length": 1298.9453125, "completions/mean_terminated_length": 745.5173950195312, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 1.6929133858267718, "grad_norm": 0.461676687002182, "learning_rate": 1e-06, "loss": -0.0593, "num_tokens": 113074303.0, "reward": 0.4765625298023224, "reward_std": 0.1631140559911728, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 725 }, { "clip_ratio/high_max": 0.0025204136691172607, "clip_ratio/high_mean": 0.0009060058400791604, "clip_ratio/low_mean": 0.0008604796603322029, "clip_ratio/low_min": 2.49351687671151e-05, "clip_ratio/region_mean": 0.0017664854640315752, "epoch": 1.695246427529892, "grad_norm": 0.25128376483917236, "learning_rate": 1e-06, "loss": -0.0595, "step": 726 }, { "clip_ratio/high_max": 0.0029437264820444398, "clip_ratio/high_mean": 0.0010224938123428728, "clip_ratio/low_mean": 0.0011038107513741124, "clip_ratio/low_min": 5.0689373892964795e-05, "clip_ratio/region_mean": 0.0021263045928208157, "epoch": 1.6975794692330126, "grad_norm": 0.2460564225912094, "learning_rate": 1e-06, "loss": -0.0597, "step": 727 }, { "clip_ratio/high_max": 0.00299599785648752, "clip_ratio/high_mean": 0.0010116043558809906, "clip_ratio/low_mean": 0.0012315997555560898, "clip_ratio/low_min": 3.740275133168325e-05, "clip_ratio/region_mean": 0.0022432041296269745, "epoch": 1.6999125109361328, "grad_norm": 0.2009311467409134, "learning_rate": 1e-06, "loss": -0.0598, "step": 728 }, { "clip_ratio/high_max": 0.0032209225682890974, "clip_ratio/high_mean": 0.0012293942127143964, "clip_ratio/low_mean": 0.0007464203936251579, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001975814600882586, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 935.489990234375, "completions/mean_terminated_length": 582.5794067382812, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.7022455526392535, "grad_norm": 4.980407238006592, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 113639638.0, "reward": 0.6272321939468384, "reward_std": 0.15898387134075165, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 729 }, { "clip_ratio/high_max": 0.0027195838920306414, "clip_ratio/high_mean": 0.0010664332712622127, "clip_ratio/low_mean": 0.0008129657744575525, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018793990529957227, "epoch": 1.704578594342374, "grad_norm": 0.5269160866737366, "learning_rate": 1e-06, "loss": -0.0385, "step": 730 }, { "clip_ratio/high_max": 0.0033131463642348535, "clip_ratio/high_mean": 0.0012982577991351718, "clip_ratio/low_mean": 0.000979185013420647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00227744277799502, "epoch": 1.7069116360454943, "grad_norm": 0.255098432302475, "learning_rate": 1e-06, "loss": -0.0388, "step": 731 }, { "clip_ratio/high_max": 0.003772572054003831, "clip_ratio/high_mean": 0.0013682142562174704, "clip_ratio/low_mean": 0.0012063533049513353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002574567508418113, "epoch": 1.7092446777486148, "grad_norm": 0.2409784346818924, "learning_rate": 1e-06, "loss": -0.039, "step": 732 }, { "clip_ratio/high_max": 0.002477140362316277, "clip_ratio/high_mean": 0.0010114155047631357, "clip_ratio/low_mean": 0.0007586789797642268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017700944517855532, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3569.0, "completions/mean_length": 1213.34375, "completions/mean_terminated_length": 684.03173828125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.7115777194517352, "grad_norm": 0.31858381628990173, "learning_rate": 1e-06, "loss": -0.0583, "num_tokens": 114250298.0, "reward": 0.5022321939468384, "reward_std": 0.18077491223812103, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 733 }, { "clip_ratio/high_max": 0.003232018687413074, "clip_ratio/high_mean": 0.0012371951961540617, "clip_ratio/low_mean": 0.0008931870615924709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00213038226502249, "epoch": 1.7139107611548556, "grad_norm": 0.27929049730300903, "learning_rate": 1e-06, "loss": -0.0585, "step": 734 }, { "clip_ratio/high_max": 0.0030870433038217016, "clip_ratio/high_mean": 0.001217700159031665, "clip_ratio/low_mean": 0.0010097362828673795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002227436467364896, "epoch": 1.716243802857976, "grad_norm": 0.6106163263320923, "learning_rate": 1e-06, "loss": -0.0586, "step": 735 }, { "clip_ratio/high_max": 0.0035824427686748095, "clip_ratio/high_mean": 0.0013400594325503334, "clip_ratio/low_mean": 0.0011223824658372905, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024624418656458147, "epoch": 1.7185768445610965, "grad_norm": 0.29838284850120544, "learning_rate": 1e-06, "loss": -0.0588, "step": 736 }, { "clip_ratio/high_max": 0.002664028783328831, "clip_ratio/high_mean": 0.000893843942321837, "clip_ratio/low_mean": 0.0006474540932686068, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015412980574183166, "completions/clipped_ratio": 0.1529017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 1172.888427734375, "completions/mean_terminated_length": 645.2648315429688, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 1.720909886264217, "grad_norm": 0.2895461916923523, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 114825838.0, "reward": 0.5189732313156128, "reward_std": 0.18460315465927124, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 737 }, { "clip_ratio/high_max": 0.002998348449182231, "clip_ratio/high_mean": 0.0011709841783158481, "clip_ratio/low_mean": 0.0008560227888665395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002027006987191271, "epoch": 1.7232429279673376, "grad_norm": 0.2838369905948639, "learning_rate": 1e-06, "loss": -0.064, "step": 738 }, { "clip_ratio/high_max": 0.003120548695733305, "clip_ratio/high_mean": 0.0010799289084388874, "clip_ratio/low_mean": 0.0009921840646711644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020721129767480306, "epoch": 1.7255759696704578, "grad_norm": 0.22486697137355804, "learning_rate": 1e-06, "loss": -0.0641, "step": 739 }, { "clip_ratio/high_max": 0.003253393340855837, "clip_ratio/high_mean": 0.0011882542421517428, "clip_ratio/low_mean": 0.0011494070386106614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002337661288038362, "epoch": 1.7279090113735784, "grad_norm": 0.21993814408779144, "learning_rate": 1e-06, "loss": -0.0642, "step": 740 }, { "clip_ratio/high_max": 0.0021724328616983257, "clip_ratio/high_mean": 0.000998477484245086, "clip_ratio/low_mean": 0.0005606676586467074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00155914512288291, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 1220.5023193359375, "completions/mean_terminated_length": 678.9628295898438, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 1.7302420530766986, "grad_norm": 15.584522247314453, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 115429624.0, "reward": 0.4843750298023224, "reward_std": 0.19178421795368195, "rewards/verify_math_reward/mean": 0.484375, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 741 }, { "clip_ratio/high_max": 0.0025229384045815095, "clip_ratio/high_mean": 0.0011178900549566606, "clip_ratio/low_mean": 0.0006621437532885466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017800338100641966, "epoch": 1.7325750947798193, "grad_norm": 0.31810426712036133, "learning_rate": 1e-06, "loss": -0.0585, "step": 742 }, { "clip_ratio/high_max": 0.002786370248941239, "clip_ratio/high_mean": 0.0011399284230719786, "clip_ratio/low_mean": 0.0009124590033025015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020523874336504377, "epoch": 1.7349081364829395, "grad_norm": 0.26250070333480835, "learning_rate": 1e-06, "loss": -0.0587, "step": 743 }, { "clip_ratio/high_max": 0.00331538483442273, "clip_ratio/high_mean": 0.001331851784925675, "clip_ratio/low_mean": 0.0010769663022074383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024088180798571557, "epoch": 1.7372411781860602, "grad_norm": 0.3217748701572418, "learning_rate": 1e-06, "loss": -0.0589, "step": 744 }, { "clip_ratio/high_max": 0.0026755058133858256, "clip_ratio/high_mean": 0.0010677093159756623, "clip_ratio/low_mean": 0.0005155050075700274, "clip_ratio/low_min": 3.306003782199696e-05, "clip_ratio/region_mean": 0.0015832143326406367, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3350.0, "completions/mean_length": 1036.04248046875, "completions/mean_terminated_length": 621.067138671875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 1.7395742198891804, "grad_norm": 0.28304192423820496, "learning_rate": 1e-06, "loss": -0.0671, "num_tokens": 116007390.0, "reward": 0.590401828289032, "reward_std": 0.1833678036928177, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 745 }, { "clip_ratio/high_max": 0.003182979482517112, "clip_ratio/high_mean": 0.0013363692451093812, "clip_ratio/low_mean": 0.0006916183174325852, "clip_ratio/low_min": 3.306003782199696e-05, "clip_ratio/region_mean": 0.0020279875679989345, "epoch": 1.741907261592301, "grad_norm": 0.27757349610328674, "learning_rate": 1e-06, "loss": -0.0673, "step": 746 }, { "clip_ratio/high_max": 0.0030266371395555325, "clip_ratio/high_mean": 0.0012793679743481334, "clip_ratio/low_mean": 0.0008264389962278074, "clip_ratio/low_min": 2.2045855075703003e-05, "clip_ratio/region_mean": 0.0021058070196886547, "epoch": 1.7442403032954215, "grad_norm": 0.32926011085510254, "learning_rate": 1e-06, "loss": -0.0675, "step": 747 }, { "clip_ratio/high_max": 0.0031405624249600805, "clip_ratio/high_mean": 0.0013172100225347094, "clip_ratio/low_mean": 0.0010678889229893684, "clip_ratio/low_min": 4.4091710151406005e-05, "clip_ratio/region_mean": 0.002385098960075993, "epoch": 1.7465733449985419, "grad_norm": 0.22287850081920624, "learning_rate": 1e-06, "loss": -0.0676, "step": 748 }, { "clip_ratio/high_max": 0.00284657390875509, "clip_ratio/high_mean": 0.0011250452407693956, "clip_ratio/low_mean": 0.0005324845951690804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016575298213865608, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 1238.094970703125, "completions/mean_terminated_length": 690.83642578125, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 1.7489063867016623, "grad_norm": 1.9422023296356201, "learning_rate": 1e-06, "loss": -0.0883, "num_tokens": 116617915.0, "reward": 0.5636160969734192, "reward_std": 0.19632963836193085, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 749 }, { "clip_ratio/high_max": 0.003081685288634617, "clip_ratio/high_mean": 0.001197873065393651, "clip_ratio/low_mean": 0.0006674429932900239, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018653160441317596, "epoch": 1.7512394284047827, "grad_norm": 0.4773826003074646, "learning_rate": 1e-06, "loss": -0.0889, "step": 750 }, { "clip_ratio/high_max": 0.0031742432111059316, "clip_ratio/high_mean": 0.001281333199585788, "clip_ratio/low_mean": 0.0008765245638642227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002157857801648788, "epoch": 1.7535724701079032, "grad_norm": 0.33436018228530884, "learning_rate": 1e-06, "loss": -0.089, "step": 751 }, { "clip_ratio/high_max": 0.0033928682314581238, "clip_ratio/high_mean": 0.0013453663850668818, "clip_ratio/low_mean": 0.0011055918002966791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002450958236295264, "epoch": 1.7559055118110236, "grad_norm": 0.25220417976379395, "learning_rate": 1e-06, "loss": -0.0892, "step": 752 }, { "clip_ratio/high_max": 0.0028613456306629814, "clip_ratio/high_mean": 0.0011327316424285527, "clip_ratio/low_mean": 0.0008035421469685389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019362737948540598, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3671.0, "completions/mean_length": 1182.8795166015625, "completions/mean_terminated_length": 675.0878295898438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.758238553514144, "grad_norm": 314.83416748046875, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 117229695.0, "reward": 0.5479910969734192, "reward_std": 0.20061388611793518, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 753 }, { "clip_ratio/high_max": 0.0027540715091163293, "clip_ratio/high_mean": 0.001109945667849388, "clip_ratio/low_mean": 0.0007770186812194879, "clip_ratio/low_min": 3.0285977118182927e-05, "clip_ratio/region_mean": 0.0018869643317884766, "epoch": 1.7605715952172645, "grad_norm": 0.3226010203361511, "learning_rate": 1e-06, "loss": -0.0654, "step": 754 }, { "clip_ratio/high_max": 0.003243813320295885, "clip_ratio/high_mean": 0.0013276745921757538, "clip_ratio/low_mean": 0.00103389822106692, "clip_ratio/low_min": 1.14009490062017e-05, "clip_ratio/region_mean": 0.002361572755035013, "epoch": 1.762904636920385, "grad_norm": 238.32568359375, "learning_rate": 1e-06, "loss": -0.0282, "step": 755 }, { "clip_ratio/high_max": 0.0033799676239141263, "clip_ratio/high_mean": 0.0012686898298852611, "clip_ratio/low_mean": 0.0009171866286123986, "clip_ratio/low_min": 3.549853136064485e-05, "clip_ratio/region_mean": 0.0021858764594071545, "epoch": 1.7652376786235053, "grad_norm": 0.6687106490135193, "learning_rate": 1e-06, "loss": -0.0656, "step": 756 }, { "clip_ratio/high_max": 0.0024139823726727627, "clip_ratio/high_mean": 0.0010356007805967238, "clip_ratio/low_mean": 0.0006558243112522177, "clip_ratio/low_min": 3.3641588743194006e-05, "clip_ratio/region_mean": 0.0016914250954869203, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 1133.8739013671875, "completions/mean_terminated_length": 710.7130126953125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.767570720326626, "grad_norm": 0.3212842047214508, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 117871678.0, "reward": 0.5691964626312256, "reward_std": 0.2011028379201889, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 757 }, { "clip_ratio/high_max": 0.00265854928875342, "clip_ratio/high_mean": 0.0011482163208711427, "clip_ratio/low_mean": 0.0009092958716792054, "clip_ratio/low_min": 5.7681192629388534e-05, "clip_ratio/region_mean": 0.0020575121670844965, "epoch": 1.7699037620297462, "grad_norm": 0.4126548171043396, "learning_rate": 1e-06, "loss": -0.0398, "step": 758 }, { "clip_ratio/high_max": 0.0027757582283811644, "clip_ratio/high_mean": 0.0012343671405687928, "clip_ratio/low_mean": 0.0010273049119859934, "clip_ratio/low_min": 6.639694674959173e-05, "clip_ratio/region_mean": 0.0022616721253143623, "epoch": 1.7722368037328668, "grad_norm": 0.4178924858570099, "learning_rate": 1e-06, "loss": -0.04, "step": 759 }, { "clip_ratio/high_max": 0.002975584313389845, "clip_ratio/high_mean": 0.0011832737181975972, "clip_ratio/low_mean": 0.0012183574799564667, "clip_ratio/low_min": 6.548060991917737e-05, "clip_ratio/region_mean": 0.002401631194516085, "epoch": 1.774569845435987, "grad_norm": 0.4640863835811615, "learning_rate": 1e-06, "loss": -0.0401, "step": 760 }, { "clip_ratio/high_max": 0.0027177054216735996, "clip_ratio/high_mean": 0.0010661677770258393, "clip_ratio/low_mean": 0.0008247852142631018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018909530226665083, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 1084.7801513671875, "completions/mean_terminated_length": 623.602294921875, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 1.7769028871391077, "grad_norm": 1.8677130937576294, "learning_rate": 1e-06, "loss": -0.073, "num_tokens": 118446569.0, "reward": 0.5848214626312256, "reward_std": 0.18629561364650726, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 761 }, { "clip_ratio/high_max": 0.0027192810812266544, "clip_ratio/high_mean": 0.0011223935507587157, "clip_ratio/low_mean": 0.0008811653997327085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002003558969590813, "epoch": 1.779235928842228, "grad_norm": 0.276852011680603, "learning_rate": 1e-06, "loss": -0.0745, "step": 762 }, { "clip_ratio/high_max": 0.002809308636642527, "clip_ratio/high_mean": 0.0011326930944051128, "clip_ratio/low_mean": 0.0011326893554723938, "clip_ratio/low_min": 1.7212889360962436e-05, "clip_ratio/region_mean": 0.0022653824562439695, "epoch": 1.7815689705453486, "grad_norm": 0.3082873225212097, "learning_rate": 1e-06, "loss": -0.0747, "step": 763 }, { "clip_ratio/high_max": 0.0029465278930729255, "clip_ratio/high_mean": 0.0012359132633719128, "clip_ratio/low_mean": 0.0012552761309052585, "clip_ratio/low_min": 1.7212889360962436e-05, "clip_ratio/region_mean": 0.0024911894215620123, "epoch": 1.7839020122484688, "grad_norm": 0.273729532957077, "learning_rate": 1e-06, "loss": -0.0748, "step": 764 }, { "clip_ratio/high_max": 0.002847759962605778, "clip_ratio/high_mean": 0.0011323506605549483, "clip_ratio/low_mean": 0.0007618699310114607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001894220600661356, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 1210.5234375, "completions/mean_terminated_length": 630.3338012695312, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 1.7862350539515894, "grad_norm": 3.351031541824341, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 119000446.0, "reward": 0.543526828289032, "reward_std": 0.1925010085105896, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 765 }, { "clip_ratio/high_max": 0.00323330718674697, "clip_ratio/high_mean": 0.001256571958947461, "clip_ratio/low_mean": 0.000871165502758231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002127737505361438, "epoch": 1.7885680956547099, "grad_norm": 0.3607265055179596, "learning_rate": 1e-06, "loss": -0.0643, "step": 766 }, { "clip_ratio/high_max": 0.003260108904214576, "clip_ratio/high_mean": 0.001346371071122121, "clip_ratio/low_mean": 0.0010009528577938909, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023473239489248954, "epoch": 1.7909011373578303, "grad_norm": 2.097149133682251, "learning_rate": 1e-06, "loss": -0.0644, "step": 767 }, { "clip_ratio/high_max": 0.0032591589260846376, "clip_ratio/high_mean": 0.0012323111895966576, "clip_ratio/low_mean": 0.0012420912171364762, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002474402412190102, "epoch": 1.7932341790609507, "grad_norm": 0.3503873646259308, "learning_rate": 1e-06, "loss": -0.0647, "step": 768 }, { "clip_ratio/high_max": 0.0026258206671627704, "clip_ratio/high_mean": 0.000990385753539158, "clip_ratio/low_mean": 0.0005832842853124021, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015736700370325707, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3763.0, "completions/mean_length": 1336.5770263671875, "completions/mean_terminated_length": 713.7250366210938, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 1.7955672207640712, "grad_norm": 0.3459215462207794, "learning_rate": 1e-06, "loss": -0.0814, "num_tokens": 119614675.0, "reward": 0.4899553656578064, "reward_std": 0.16915760934352875, "rewards/verify_math_reward/mean": 0.4899553656578064, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 769 }, { "clip_ratio/high_max": 0.003370824648300186, "clip_ratio/high_mean": 0.00108847231240361, "clip_ratio/low_mean": 0.0008698051697137998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019582774766604416, "epoch": 1.7979002624671916, "grad_norm": 0.36510348320007324, "learning_rate": 1e-06, "loss": -0.0816, "step": 770 }, { "clip_ratio/high_max": 0.003368631034391001, "clip_ratio/high_mean": 0.00110209556260088, "clip_ratio/low_mean": 0.0010230665575363673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021251621292321943, "epoch": 1.800233304170312, "grad_norm": 0.2574589252471924, "learning_rate": 1e-06, "loss": -0.0818, "step": 771 }, { "clip_ratio/high_max": 0.0032451283805130515, "clip_ratio/high_mean": 0.0010494873076822842, "clip_ratio/low_mean": 0.0011778191728808451, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002227306496934034, "epoch": 1.8025663458734325, "grad_norm": 0.30454590916633606, "learning_rate": 1e-06, "loss": -0.0818, "step": 772 }, { "clip_ratio/high_max": 0.0029339980173972435, "clip_ratio/high_mean": 0.001096039170079166, "clip_ratio/low_mean": 0.000837438672533608, "clip_ratio/low_min": 2.071594281005673e-05, "clip_ratio/region_mean": 0.0019334778990014456, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3170.0, "completions/mean_length": 1040.3717041015625, "completions/mean_terminated_length": 608.3019409179688, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 1.8048993875765529, "grad_norm": 0.7104086875915527, "learning_rate": 1e-06, "loss": -0.065, "num_tokens": 120177648.0, "reward": 0.5580357313156128, "reward_std": 0.18727272748947144, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 773 }, { "clip_ratio/high_max": 0.0030959622308728285, "clip_ratio/high_mean": 0.001223422081238823, "clip_ratio/low_mean": 0.0009762022382346913, "clip_ratio/low_min": 1.2765523024427239e-05, "clip_ratio/region_mean": 0.002199624286731705, "epoch": 1.8072324292796735, "grad_norm": 0.287118524312973, "learning_rate": 1e-06, "loss": -0.0654, "step": 774 }, { "clip_ratio/high_max": 0.003126847732346505, "clip_ratio/high_mean": 0.0011743563081836328, "clip_ratio/low_mean": 0.0012620371744560543, "clip_ratio/low_min": 1.188212900160579e-05, "clip_ratio/region_mean": 0.002436393508105539, "epoch": 1.8095654709827937, "grad_norm": 0.28494638204574585, "learning_rate": 1e-06, "loss": -0.0656, "step": 775 }, { "clip_ratio/high_max": 0.0030919609125703573, "clip_ratio/high_mean": 0.0012231979781063274, "clip_ratio/low_mean": 0.001315563911703066, "clip_ratio/low_min": 4.307374183554202e-05, "clip_ratio/region_mean": 0.0025387618952663615, "epoch": 1.8118985126859144, "grad_norm": 0.3085228204727173, "learning_rate": 1e-06, "loss": -0.0657, "step": 776 }, { "clip_ratio/high_max": 0.0029989995091455057, "clip_ratio/high_mean": 0.0012472212983993813, "clip_ratio/low_mean": 0.0008154747411026619, "clip_ratio/low_min": 1.852400782809127e-05, "clip_ratio/region_mean": 0.0020626960395020433, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 1074.548095703125, "completions/mean_terminated_length": 611.8031005859375, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 1.8142315543890346, "grad_norm": 33.373863220214844, "learning_rate": 1e-06, "loss": -0.0737, "num_tokens": 120737403.0, "reward": 0.5613839626312256, "reward_std": 0.1915150135755539, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 777 }, { "clip_ratio/high_max": 0.004196706533548422, "clip_ratio/high_mean": 0.0015237740080920048, "clip_ratio/low_mean": 0.0008571688886149786, "clip_ratio/low_min": 1.7448352082283236e-05, "clip_ratio/region_mean": 0.0023809428603271954, "epoch": 1.8165645960921553, "grad_norm": 0.44422808289527893, "learning_rate": 1e-06, "loss": -0.0751, "step": 778 }, { "clip_ratio/high_max": 0.0038123916383483447, "clip_ratio/high_mean": 0.00148162886944192, "clip_ratio/low_mean": 0.001125619901358732, "clip_ratio/low_min": 1.852400782809127e-05, "clip_ratio/region_mean": 0.002607248825370334, "epoch": 1.8188976377952755, "grad_norm": 0.4247210919857025, "learning_rate": 1e-06, "loss": -0.0752, "step": 779 }, { "clip_ratio/high_max": 0.004324031615396962, "clip_ratio/high_mean": 0.0016493947514391039, "clip_ratio/low_mean": 0.0014935293256712612, "clip_ratio/low_min": 3.704801565618254e-05, "clip_ratio/region_mean": 0.0031429240843863226, "epoch": 1.8212306794983961, "grad_norm": 0.2895664870738983, "learning_rate": 1e-06, "loss": -0.0756, "step": 780 }, { "clip_ratio/high_max": 0.002218426438048482, "clip_ratio/high_mean": 0.000985602178843692, "clip_ratio/low_mean": 0.00051336269643798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014989648880145978, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 1205.4442138671875, "completions/mean_terminated_length": 656.50732421875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 1.8235637212015163, "grad_norm": 0.6474166512489319, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 121314625.0, "reward": 0.59375, "reward_std": 0.17322726547718048, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 781 }, { "clip_ratio/high_max": 0.003220126745873131, "clip_ratio/high_mean": 0.0012350822689768393, "clip_ratio/low_mean": 0.0006166999246488558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001851782166340854, "epoch": 1.825896762904637, "grad_norm": 0.26349204778671265, "learning_rate": 1e-06, "loss": -0.0749, "step": 782 }, { "clip_ratio/high_max": 0.0028772126461262815, "clip_ratio/high_mean": 0.0010976546873280313, "clip_ratio/low_mean": 0.0008049707248574123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019026254085474648, "epoch": 1.8282298046077574, "grad_norm": 0.8894323706626892, "learning_rate": 1e-06, "loss": -0.075, "step": 783 }, { "clip_ratio/high_max": 0.0026555404547252692, "clip_ratio/high_mean": 0.0010447209933772683, "clip_ratio/low_mean": 0.0008835730532155139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019282940484117717, "epoch": 1.8305628463108778, "grad_norm": 0.24377326667308807, "learning_rate": 1e-06, "loss": -0.0751, "step": 784 }, { "clip_ratio/high_max": 0.0024160910106729716, "clip_ratio/high_mean": 0.0010153386174351908, "clip_ratio/low_mean": 0.0009500876221864019, "clip_ratio/low_min": 5.838268043589778e-05, "clip_ratio/region_mean": 0.0019654262214316987, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3156.0, "completions/mean_length": 1289.6942138671875, "completions/mean_terminated_length": 720.8993530273438, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 1.8328958880139983, "grad_norm": 0.41125309467315674, "learning_rate": 1e-06, "loss": -0.0585, "num_tokens": 121952503.0, "reward": 0.4821428656578064, "reward_std": 0.20970793068408966, "rewards/verify_math_reward/mean": 0.4821428656578064, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 785 }, { "clip_ratio/high_max": 0.002769492071820423, "clip_ratio/high_mean": 0.001225576525030192, "clip_ratio/low_mean": 0.001038886030073627, "clip_ratio/low_min": 3.257753633079119e-05, "clip_ratio/region_mean": 0.0022644625714747235, "epoch": 1.8352289297171187, "grad_norm": 0.3411327600479126, "learning_rate": 1e-06, "loss": -0.0587, "step": 786 }, { "clip_ratio/high_max": 0.0027927573755732737, "clip_ratio/high_mean": 0.0012394625846354757, "clip_ratio/low_mean": 0.0013133646643836983, "clip_ratio/low_min": 6.689386646030471e-05, "clip_ratio/region_mean": 0.0025528272235533223, "epoch": 1.8375619714202391, "grad_norm": 0.2850266098976135, "learning_rate": 1e-06, "loss": -0.0589, "step": 787 }, { "clip_ratio/high_max": 0.0027165829815203324, "clip_ratio/high_mean": 0.001161423610028578, "clip_ratio/low_mean": 0.00147952593761147, "clip_ratio/low_min": 8.636773054604419e-05, "clip_ratio/region_mean": 0.0026409495039843023, "epoch": 1.8398950131233596, "grad_norm": 0.26047283411026, "learning_rate": 1e-06, "loss": -0.059, "step": 788 }, { "clip_ratio/high_max": 0.002578800485935062, "clip_ratio/high_mean": 0.001047983752869186, "clip_ratio/low_mean": 0.0008012510315893451, "clip_ratio/low_min": 4.130394972889917e-05, "clip_ratio/region_mean": 0.0018492348463041708, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3339.0, "completions/mean_length": 1123.114990234375, "completions/mean_terminated_length": 636.642822265625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.84222805482648, "grad_norm": 0.5912438631057739, "learning_rate": 1e-06, "loss": -0.0424, "num_tokens": 122538254.0, "reward": 0.4754464626312256, "reward_std": 0.1777697503566742, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756315231323, "step": 789 }, { "clip_ratio/high_max": 0.0028244317072676495, "clip_ratio/high_mean": 0.0011763499805965694, "clip_ratio/low_mean": 0.0009410630318598123, "clip_ratio/low_min": 9.711849997984245e-05, "clip_ratio/region_mean": 0.0021174130306462757, "epoch": 1.8445610965296004, "grad_norm": 0.3282478153705597, "learning_rate": 1e-06, "loss": -0.0426, "step": 790 }, { "clip_ratio/high_max": 0.002955569259938784, "clip_ratio/high_mean": 0.001260399745660834, "clip_ratio/low_mean": 0.001283634115679888, "clip_ratio/low_min": 7.326204649871215e-05, "clip_ratio/region_mean": 0.0025440337922191247, "epoch": 1.8468941382327209, "grad_norm": 0.25638914108276367, "learning_rate": 1e-06, "loss": -0.043, "step": 791 }, { "clip_ratio/high_max": 0.0030136297100398224, "clip_ratio/high_mean": 0.0011830145158455707, "clip_ratio/low_mean": 0.0014160231658024713, "clip_ratio/low_min": 6.97122286510421e-05, "clip_ratio/region_mean": 0.0025990377325797454, "epoch": 1.8492271799358413, "grad_norm": 0.23427437245845795, "learning_rate": 1e-06, "loss": -0.0431, "step": 792 }, { "clip_ratio/high_max": 0.0024229781483882107, "clip_ratio/high_mean": 0.0009719952849991387, "clip_ratio/low_mean": 0.0005247754725132836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001496770742960507, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 1155.212158203125, "completions/mean_terminated_length": 628.9658203125, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 1.851560221638962, "grad_norm": 0.3743833303451538, "learning_rate": 1e-06, "loss": -0.0537, "num_tokens": 123096836.0, "reward": 0.5167410969734192, "reward_std": 0.17803259193897247, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 793 }, { "clip_ratio/high_max": 0.0029818167313351296, "clip_ratio/high_mean": 0.0012583399347931845, "clip_ratio/low_mean": 0.0006896365030115703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019479764232528396, "epoch": 1.8538932633420822, "grad_norm": 0.34099406003952026, "learning_rate": 1e-06, "loss": -0.0539, "step": 794 }, { "clip_ratio/high_max": 0.003303279147075955, "clip_ratio/high_mean": 0.001245715146069415, "clip_ratio/low_mean": 0.0008224282119044801, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020681434107245877, "epoch": 1.8562263050452028, "grad_norm": 0.3153402805328369, "learning_rate": 1e-06, "loss": -0.0541, "step": 795 }, { "clip_ratio/high_max": 0.0028148664787295274, "clip_ratio/high_mean": 0.001179153157863766, "clip_ratio/low_mean": 0.0009924966507242061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021716497576562688, "epoch": 1.858559346748323, "grad_norm": 53.812217712402344, "learning_rate": 1e-06, "loss": -0.0418, "step": 796 }, { "clip_ratio/high_max": 0.002547744916228112, "clip_ratio/high_mean": 0.0010866733482544078, "clip_ratio/low_mean": 0.0005692618005923578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001655935153394239, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 1173.587158203125, "completions/mean_terminated_length": 655.1563720703125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.8608923884514437, "grad_norm": 0.4387311041355133, "learning_rate": 1e-06, "loss": -0.0739, "num_tokens": 123686802.0, "reward": 0.5703125, "reward_std": 0.19779638946056366, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 797 }, { "clip_ratio/high_max": 0.003036022237211, "clip_ratio/high_mean": 0.001310670777456835, "clip_ratio/low_mean": 0.0008477637438772945, "clip_ratio/low_min": 9.033096830535214e-06, "clip_ratio/region_mean": 0.0021584345522569492, "epoch": 1.8632254301545639, "grad_norm": 0.3525553345680237, "learning_rate": 1e-06, "loss": -0.0742, "step": 798 }, { "clip_ratio/high_max": 0.003579358344723005, "clip_ratio/high_mean": 0.0014152447729429696, "clip_ratio/low_mean": 0.000987058156169951, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002402302976406645, "epoch": 1.8655584718576845, "grad_norm": 0.28096428513526917, "learning_rate": 1e-06, "loss": -0.0744, "step": 799 }, { "clip_ratio/high_max": 0.003146655253658537, "clip_ratio/high_mean": 0.0012560870563902427, "clip_ratio/low_mean": 0.0011969195002166089, "clip_ratio/low_min": 2.0616857000277378e-05, "clip_ratio/region_mean": 0.0024530065566068515, "epoch": 1.8678915135608047, "grad_norm": 1.1679539680480957, "learning_rate": 1e-06, "loss": -0.0743, "step": 800 }, { "clip_ratio/high_max": 0.0026608863154251594, "clip_ratio/high_mean": 0.0011777408835769165, "clip_ratio/low_mean": 0.0007729054141236702, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001950646321347449, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 1082.5748291015625, "completions/mean_terminated_length": 621.0592041015625, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 1.8702245552639254, "grad_norm": 0.3649967312812805, "learning_rate": 1e-06, "loss": -0.1062, "num_tokens": 124249981.0, "reward": 0.5613839626312256, "reward_std": 0.19959835708141327, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 801 }, { "clip_ratio/high_max": 0.0034796422078215983, "clip_ratio/high_mean": 0.0013255642352305586, "clip_ratio/low_mean": 0.0009117877307289746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00223735197505448, "epoch": 1.8725575969670458, "grad_norm": 0.3103090226650238, "learning_rate": 1e-06, "loss": -0.1064, "step": 802 }, { "clip_ratio/high_max": 0.003475991848972626, "clip_ratio/high_mean": 0.0014859671118756523, "clip_ratio/low_mean": 0.0010257373705826467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025117045152001083, "epoch": 1.8748906386701663, "grad_norm": 0.2992345094680786, "learning_rate": 1e-06, "loss": -0.1065, "step": 803 }, { "clip_ratio/high_max": 0.0031524375117442105, "clip_ratio/high_mean": 0.0012972979675396346, "clip_ratio/low_mean": 0.0012619780500244815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025592759848223068, "epoch": 1.8772236803732867, "grad_norm": 0.24515242874622345, "learning_rate": 1e-06, "loss": -0.1067, "step": 804 }, { "clip_ratio/high_max": 0.0022816854107077233, "clip_ratio/high_mean": 0.0009620811269996921, "clip_ratio/low_mean": 0.0006721982017552364, "clip_ratio/low_min": 3.460731477389345e-05, "clip_ratio/region_mean": 0.001634279324207455, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 1031.529052734375, "completions/mean_terminated_length": 706.1654663085938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 1.8795567220764071, "grad_norm": 0.3367403447628021, "learning_rate": 1e-06, "loss": -0.0359, "num_tokens": 124902055.0, "reward": 0.5446428656578064, "reward_std": 0.19230598211288452, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 805 }, { "clip_ratio/high_max": 0.0026676424604374915, "clip_ratio/high_mean": 0.0010372075776103884, "clip_ratio/low_mean": 0.0008286454340122873, "clip_ratio/low_min": 4.175946378381923e-05, "clip_ratio/region_mean": 0.0018658530025277287, "epoch": 1.8818897637795275, "grad_norm": 0.5238305330276489, "learning_rate": 1e-06, "loss": -0.036, "step": 806 }, { "clip_ratio/high_max": 0.002518799461540766, "clip_ratio/high_mean": 0.0010061258362838998, "clip_ratio/low_mean": 0.0010268090845784172, "clip_ratio/low_min": 6.63214668747969e-05, "clip_ratio/region_mean": 0.0020329349499661475, "epoch": 1.884222805482648, "grad_norm": 0.22991643846035004, "learning_rate": 1e-06, "loss": -0.0362, "step": 807 }, { "clip_ratio/high_max": 0.0027269739548501093, "clip_ratio/high_mean": 0.0010583375023998087, "clip_ratio/low_mean": 0.0011506538266985444, "clip_ratio/low_min": 0.00011588513007154688, "clip_ratio/region_mean": 0.002208991310908459, "epoch": 1.8865558471857684, "grad_norm": 0.2613964378833771, "learning_rate": 1e-06, "loss": -0.0363, "step": 808 }, { "clip_ratio/high_max": 0.0030836150763207115, "clip_ratio/high_mean": 0.001089305558707565, "clip_ratio/low_mean": 0.0009967237756427494, "clip_ratio/low_min": 3.895116242347285e-05, "clip_ratio/region_mean": 0.0020860293152509257, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3641.0, "completions/mean_length": 1208.86279296875, "completions/mean_terminated_length": 632.9812622070312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.8888888888888888, "grad_norm": 0.4362976849079132, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 125466684.0, "reward": 0.4776785969734192, "reward_std": 0.19189409911632538, "rewards/verify_math_reward/mean": 0.4776785671710968, "rewards/verify_math_reward/std": 0.4997805058956146, "step": 809 }, { "clip_ratio/high_max": 0.0037536383606493473, "clip_ratio/high_mean": 0.0013200278917793185, "clip_ratio/low_mean": 0.0012613770231837407, "clip_ratio/low_min": 2.510178183001699e-05, "clip_ratio/region_mean": 0.0025814048931351863, "epoch": 1.8912219305920095, "grad_norm": 0.4780324697494507, "learning_rate": 1e-06, "loss": -0.046, "step": 810 }, { "clip_ratio/high_max": 0.003822509286692366, "clip_ratio/high_mean": 0.0013784136172034778, "clip_ratio/low_mean": 0.0014653871667178464, "clip_ratio/low_min": 7.743854257569183e-05, "clip_ratio/region_mean": 0.002843800772097893, "epoch": 1.8935549722951297, "grad_norm": 0.4442329704761505, "learning_rate": 1e-06, "loss": -0.0462, "step": 811 }, { "clip_ratio/high_max": 0.003794476651819423, "clip_ratio/high_mean": 0.0013138854246790288, "clip_ratio/low_mean": 0.0015565693893222488, "clip_ratio/low_min": 5.844498627993744e-05, "clip_ratio/region_mean": 0.0028704548240057193, "epoch": 1.8958880139982504, "grad_norm": 0.26526522636413574, "learning_rate": 1e-06, "loss": -0.0465, "step": 812 }, { "clip_ratio/high_max": 0.0037446660571731627, "clip_ratio/high_mean": 0.0013700762665393995, "clip_ratio/low_mean": 0.000577064794015314, "clip_ratio/low_min": 1.0777720490295906e-05, "clip_ratio/region_mean": 0.0019471410851110704, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3147.0, "completions/mean_length": 1187.0335693359375, "completions/mean_terminated_length": 684.4371948242188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.8982210557013706, "grad_norm": 0.5536288619041443, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 126086546.0, "reward": 0.5401785969734192, "reward_std": 0.18881365656852722, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 813 }, { "clip_ratio/high_max": 0.004004251328296959, "clip_ratio/high_mean": 0.0014463079533015843, "clip_ratio/low_mean": 0.0008713582028576639, "clip_ratio/low_min": 2.5572831873432733e-05, "clip_ratio/region_mean": 0.002317666163435206, "epoch": 1.9005540974044912, "grad_norm": 1.7858140468597412, "learning_rate": 1e-06, "loss": -0.0363, "step": 814 }, { "clip_ratio/high_max": 0.004262054026185069, "clip_ratio/high_mean": 0.001454497305530822, "clip_ratio/low_mean": 0.0008935269215726294, "clip_ratio/low_min": 1.2786415936716367e-05, "clip_ratio/region_mean": 0.002348024216189515, "epoch": 1.9028871391076114, "grad_norm": 0.5988819003105164, "learning_rate": 1e-06, "loss": -0.036, "step": 815 }, { "clip_ratio/high_max": 0.004061044914124068, "clip_ratio/high_mean": 0.0014671737189928535, "clip_ratio/low_mean": 0.0010717650038714055, "clip_ratio/low_min": 2.5572831873432733e-05, "clip_ratio/region_mean": 0.0025389387301402166, "epoch": 1.905220180810732, "grad_norm": 0.28887414932250977, "learning_rate": 1e-06, "loss": -0.0366, "step": 816 }, { "clip_ratio/high_max": 0.0020236366981407627, "clip_ratio/high_mean": 0.000754494802094996, "clip_ratio/low_mean": 0.0006129549783508992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013674497604370117, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3216.0, "completions/mean_length": 1156.2734375, "completions/mean_terminated_length": 666.3190307617188, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.9075532225138523, "grad_norm": 0.25467541813850403, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 126684095.0, "reward": 0.4754464626312256, "reward_std": 0.14124701917171478, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756315231323, "step": 817 }, { "clip_ratio/high_max": 0.00225022717131651, "clip_ratio/high_mean": 0.0008885783008736325, "clip_ratio/low_mean": 0.0007396828214041307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016282611395581625, "epoch": 1.909886264216973, "grad_norm": 0.2736433744430542, "learning_rate": 1e-06, "loss": -0.0507, "step": 818 }, { "clip_ratio/high_max": 0.002449413535941858, "clip_ratio/high_mean": 0.000851423601488932, "clip_ratio/low_mean": 0.0008513540087733418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017027776193572208, "epoch": 1.9122193059200934, "grad_norm": 0.22202320396900177, "learning_rate": 1e-06, "loss": -0.0509, "step": 819 }, { "clip_ratio/high_max": 0.0025430396490264684, "clip_ratio/high_mean": 0.000918691994229448, "clip_ratio/low_mean": 0.0009849502184806624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019036421726923436, "epoch": 1.9145523476232138, "grad_norm": 0.19813315570354462, "learning_rate": 1e-06, "loss": -0.0509, "step": 820 }, { "clip_ratio/high_max": 0.0027628564421320334, "clip_ratio/high_mean": 0.0011145286698592827, "clip_ratio/low_mean": 0.0008798533854132984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019943820880143903, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 1152.6507568359375, "completions/mean_terminated_length": 710.58154296875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 1.9168853893263342, "grad_norm": 0.7257772088050842, "learning_rate": 1e-06, "loss": -0.063, "num_tokens": 127317262.0, "reward": 0.512276828289032, "reward_std": 0.21259252727031708, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 821 }, { "clip_ratio/high_max": 0.0032998526439769194, "clip_ratio/high_mean": 0.0013349998007470276, "clip_ratio/low_mean": 0.0010927089679171331, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024277087577502243, "epoch": 1.9192184310294547, "grad_norm": 2.3483970165252686, "learning_rate": 1e-06, "loss": -0.0629, "step": 822 }, { "clip_ratio/high_max": 0.0029819938354194164, "clip_ratio/high_mean": 0.0012345084032858722, "clip_ratio/low_mean": 0.0012205102648294996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024550186353735626, "epoch": 1.921551472732575, "grad_norm": 0.392425000667572, "learning_rate": 1e-06, "loss": -0.0634, "step": 823 }, { "clip_ratio/high_max": 0.0033773245886550285, "clip_ratio/high_mean": 0.0012698962418653537, "clip_ratio/low_mean": 0.0014133723780105356, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026832685980480164, "epoch": 1.9238845144356955, "grad_norm": 0.4597063958644867, "learning_rate": 1e-06, "loss": -0.0636, "step": 824 }, { "clip_ratio/high_max": 0.0018310553241462912, "clip_ratio/high_mean": 0.0006253767978705582, "clip_ratio/low_mean": 0.0005611752451386565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011865520209539682, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3624.0, "completions/mean_length": 1177.08154296875, "completions/mean_terminated_length": 663.7808227539062, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 1.926217556138816, "grad_norm": 0.2965056598186493, "learning_rate": 1e-06, "loss": -0.0386, "num_tokens": 127916511.0, "reward": 0.5502232313156128, "reward_std": 0.12918464839458466, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 825 }, { "clip_ratio/high_max": 0.0022495640587294474, "clip_ratio/high_mean": 0.0007280273312062491, "clip_ratio/low_mean": 0.0006317879751804867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013598152909253258, "epoch": 1.9285505978419364, "grad_norm": 0.2959650456905365, "learning_rate": 1e-06, "loss": -0.0388, "step": 826 }, { "clip_ratio/high_max": 0.002368602505157469, "clip_ratio/high_mean": 0.0007911892898846418, "clip_ratio/low_mean": 0.0007485928449568746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015397820898215286, "epoch": 1.9308836395450568, "grad_norm": 0.26381915807724, "learning_rate": 1e-06, "loss": -0.0389, "step": 827 }, { "clip_ratio/high_max": 0.002355575856199721, "clip_ratio/high_mean": 0.000759249429393094, "clip_ratio/low_mean": 0.0008702262921360671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001629475736990571, "epoch": 1.9332166812481772, "grad_norm": 0.2257702499628067, "learning_rate": 1e-06, "loss": -0.0389, "step": 828 }, { "clip_ratio/high_max": 0.002666196880454663, "clip_ratio/high_mean": 0.001190558625239646, "clip_ratio/low_mean": 0.0006595710410692845, "clip_ratio/low_min": 2.544788185332436e-05, "clip_ratio/region_mean": 0.001850129643571563, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3405.0, "completions/mean_length": 1111.872802734375, "completions/mean_terminated_length": 707.1812133789062, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.935549722951298, "grad_norm": 2.402773380279541, "learning_rate": 1e-06, "loss": -0.0681, "num_tokens": 128565469.0, "reward": 0.515625, "reward_std": 0.20482298731803894, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 829 }, { "clip_ratio/high_max": 0.002947571367258206, "clip_ratio/high_mean": 0.0013106669175613206, "clip_ratio/low_mean": 0.000856815811857814, "clip_ratio/low_min": 3.255208503105678e-05, "clip_ratio/region_mean": 0.0021674827657989226, "epoch": 1.937882764654418, "grad_norm": 0.3744939863681793, "learning_rate": 1e-06, "loss": -0.0683, "step": 830 }, { "clip_ratio/high_max": 0.003122714195342269, "clip_ratio/high_mean": 0.001462145271943882, "clip_ratio/low_mean": 0.0009538302620057948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024159755193977617, "epoch": 1.9402158063575388, "grad_norm": 1.1734836101531982, "learning_rate": 1e-06, "loss": -0.0684, "step": 831 }, { "clip_ratio/high_max": 0.0029801751079503447, "clip_ratio/high_mean": 0.0013755303480138537, "clip_ratio/low_mean": 0.0010851121296582278, "clip_ratio/low_min": 3.255208503105678e-05, "clip_ratio/region_mean": 0.002460642463120166, "epoch": 1.942548848060659, "grad_norm": 0.3453752398490906, "learning_rate": 1e-06, "loss": -0.0683, "step": 832 }, { "clip_ratio/high_max": 0.0026444850518601015, "clip_ratio/high_mean": 0.0011122945325041655, "clip_ratio/low_mean": 0.0006181504722917452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001730444942950271, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3287.0, "completions/mean_length": 919.4688110351562, "completions/mean_terminated_length": 582.2074584960938, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 1.9448818897637796, "grad_norm": 0.3468431830406189, "learning_rate": 1e-06, "loss": -0.0802, "num_tokens": 129122281.0, "reward": 0.6205357313156128, "reward_std": 0.18888893723487854, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 833 }, { "clip_ratio/high_max": 0.0030155741260387003, "clip_ratio/high_mean": 0.001269630141905509, "clip_ratio/low_mean": 0.0007621770364494296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020318071692599915, "epoch": 1.9472149314668998, "grad_norm": 0.325935959815979, "learning_rate": 1e-06, "loss": -0.0804, "step": 834 }, { "clip_ratio/high_max": 0.0034363660306553356, "clip_ratio/high_mean": 0.0014364147209562361, "clip_ratio/low_mean": 0.0009700831869849935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024064979443210177, "epoch": 1.9495479731700205, "grad_norm": 3.0018725395202637, "learning_rate": 1e-06, "loss": -0.0804, "step": 835 }, { "clip_ratio/high_max": 0.0032150969636859372, "clip_ratio/high_mean": 0.0012787321593350498, "clip_ratio/low_mean": 0.001153940340373083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024326724815182388, "epoch": 1.9518810148731407, "grad_norm": 0.2656398117542267, "learning_rate": 1e-06, "loss": -0.0807, "step": 836 }, { "clip_ratio/high_max": 0.002971956637338735, "clip_ratio/high_mean": 0.0011926164988835808, "clip_ratio/low_mean": 0.0010007649470935576, "clip_ratio/low_min": 5.093288382340688e-05, "clip_ratio/region_mean": 0.002193381435063202, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3107.0, "completions/mean_length": 1037.7076416015625, "completions/mean_terminated_length": 627.3544311523438, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.9542140565762613, "grad_norm": 3.552687644958496, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 129700515.0, "reward": 0.613839328289032, "reward_std": 0.18362924456596375, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 837 }, { "clip_ratio/high_max": 0.002727187908021733, "clip_ratio/high_mean": 0.0010953459241136443, "clip_ratio/low_mean": 0.0011138769259559922, "clip_ratio/low_min": 8.177565177902579e-05, "clip_ratio/region_mean": 0.0022092228318797424, "epoch": 1.9565470982793818, "grad_norm": 0.4473375082015991, "learning_rate": 1e-06, "loss": -0.0421, "step": 838 }, { "clip_ratio/high_max": 0.003029786064871587, "clip_ratio/high_mean": 0.0011788770534622017, "clip_ratio/low_mean": 0.0012981592080905102, "clip_ratio/low_min": 8.616304876341019e-05, "clip_ratio/region_mean": 0.002477036294294521, "epoch": 1.9588801399825022, "grad_norm": 0.6042472124099731, "learning_rate": 1e-06, "loss": -0.0423, "step": 839 }, { "clip_ratio/high_max": 0.0032297696598106995, "clip_ratio/high_mean": 0.0012182233949715737, "clip_ratio/low_mean": 0.0016185630483960267, "clip_ratio/low_min": 7.21090218576137e-05, "clip_ratio/region_mean": 0.0028367864433676004, "epoch": 1.9612131816856226, "grad_norm": 0.29474377632141113, "learning_rate": 1e-06, "loss": -0.0426, "step": 840 }, { "clip_ratio/high_max": 0.0022612210086663254, "clip_ratio/high_mean": 0.0009708471043268219, "clip_ratio/low_mean": 0.000777071894844994, "clip_ratio/low_min": 6.241679875529371e-05, "clip_ratio/region_mean": 0.0017479190282756463, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 1112.2210693359375, "completions/mean_terminated_length": 641.9096069335938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 1.963546223388743, "grad_norm": 0.6825649738311768, "learning_rate": 1e-06, "loss": -0.0581, "num_tokens": 130287273.0, "reward": 0.5234375, "reward_std": 0.1935087889432907, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 841 }, { "clip_ratio/high_max": 0.003101959817286115, "clip_ratio/high_mean": 0.001174530425487319, "clip_ratio/low_mean": 0.0009283047056669602, "clip_ratio/low_min": 3.573075809981674e-05, "clip_ratio/region_mean": 0.0021028350820415653, "epoch": 1.9658792650918635, "grad_norm": 0.4014720022678375, "learning_rate": 1e-06, "loss": -0.0584, "step": 842 }, { "clip_ratio/high_max": 0.002877331127820071, "clip_ratio/high_mean": 0.0011052786103391554, "clip_ratio/low_mean": 0.0010966240715788445, "clip_ratio/low_min": 8.24038143036887e-05, "clip_ratio/region_mean": 0.002201902614615392, "epoch": 1.968212306794984, "grad_norm": 0.7077798247337341, "learning_rate": 1e-06, "loss": -0.0582, "step": 843 }, { "clip_ratio/high_max": 0.002817540746036684, "clip_ratio/high_mean": 0.001064561341991066, "clip_ratio/low_mean": 0.0013108240127621684, "clip_ratio/low_min": 7.890287452028133e-05, "clip_ratio/region_mean": 0.0023753853238304146, "epoch": 1.9705453484981044, "grad_norm": 390.95001220703125, "learning_rate": 1e-06, "loss": -0.0258, "step": 844 }, { "clip_ratio/high_max": 0.0023503071315644775, "clip_ratio/high_mean": 0.0009436144791834522, "clip_ratio/low_mean": 0.0006548724632011726, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015984869460226037, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2127.0, "completions/mean_length": 1060.759033203125, "completions/mean_terminated_length": 586.8696899414062, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 1.9728783902012248, "grad_norm": 0.41954970359802246, "learning_rate": 1e-06, "loss": -0.0637, "num_tokens": 130826633.0, "reward": 0.5959821939468384, "reward_std": 0.14676883816719055, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 845 }, { "clip_ratio/high_max": 0.0028502366185421124, "clip_ratio/high_mean": 0.0010478959393367404, "clip_ratio/low_mean": 0.0008171688277798239, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018650648125912994, "epoch": 1.9752114319043454, "grad_norm": 0.9147605895996094, "learning_rate": 1e-06, "loss": -0.0639, "step": 846 }, { "clip_ratio/high_max": 0.002952979390101973, "clip_ratio/high_mean": 0.0010682171596272383, "clip_ratio/low_mean": 0.0010566367036517477, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021248539014777634, "epoch": 1.9775444736074657, "grad_norm": 0.3660968542098999, "learning_rate": 1e-06, "loss": -0.0642, "step": 847 }, { "clip_ratio/high_max": 0.002856346509361174, "clip_ratio/high_mean": 0.0010957338181469822, "clip_ratio/low_mean": 0.001170645235106349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022663790223305114, "epoch": 1.9798775153105863, "grad_norm": 0.22567641735076904, "learning_rate": 1e-06, "loss": -0.0643, "step": 848 }, { "clip_ratio/high_max": 0.002421038123429753, "clip_ratio/high_mean": 0.0007911993634479586, "clip_ratio/low_mean": 0.0006830313559476053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014742307175765745, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 1029.943115234375, "completions/mean_terminated_length": 627.3295288085938, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 1.9822105570137065, "grad_norm": 0.3517293930053711, "learning_rate": 1e-06, "loss": -0.0286, "num_tokens": 131416230.0, "reward": 0.5792410969734192, "reward_std": 0.1522556096315384, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 849 }, { "clip_ratio/high_max": 0.003006308150361292, "clip_ratio/high_mean": 0.0010687062531360425, "clip_ratio/low_mean": 0.0008296005926240468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018983068293891847, "epoch": 1.9845435987168272, "grad_norm": 0.4864768981933594, "learning_rate": 1e-06, "loss": -0.0287, "step": 850 }, { "clip_ratio/high_max": 0.002671430294867605, "clip_ratio/high_mean": 0.0009176384555757977, "clip_ratio/low_mean": 0.0010009038560383487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019185422679584008, "epoch": 1.9868766404199474, "grad_norm": 0.2672499716281891, "learning_rate": 1e-06, "loss": -0.0289, "step": 851 }, { "clip_ratio/high_max": 0.002889887931814883, "clip_ratio/high_mean": 0.0010207217528659385, "clip_ratio/low_mean": 0.001248643891813117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002269365664687939, "epoch": 1.989209682123068, "grad_norm": 0.5882120132446289, "learning_rate": 1e-06, "loss": -0.0289, "step": 852 }, { "clip_ratio/high_max": 0.002519931716960855, "clip_ratio/high_mean": 0.0010204063219134696, "clip_ratio/low_mean": 0.0006190942267494393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016395005586673506, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3117.0, "completions/mean_length": 1085.8660888671875, "completions/mean_terminated_length": 629.3162231445312, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 1.9915427238261882, "grad_norm": 0.44835636019706726, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 131989910.0, "reward": 0.559151828289032, "reward_std": 0.1519550383090973, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 853 }, { "clip_ratio/high_max": 0.003041216761630494, "clip_ratio/high_mean": 0.0011810874530056026, "clip_ratio/low_mean": 0.0007203094846772728, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001901396899484098, "epoch": 1.993875765529309, "grad_norm": 0.7178743481636047, "learning_rate": 1e-06, "loss": -0.0602, "step": 854 }, { "clip_ratio/high_max": 0.003188244423654396, "clip_ratio/high_mean": 0.0011266791370871942, "clip_ratio/low_mean": 0.0009192660727421753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002045945220743306, "epoch": 1.9962088072324293, "grad_norm": 0.37658190727233887, "learning_rate": 1e-06, "loss": -0.0606, "step": 855 }, { "clip_ratio/high_max": 0.002952507697045803, "clip_ratio/high_mean": 0.0011015729905921035, "clip_ratio/low_mean": 0.00109630414408457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021978771037538536, "epoch": 1.9985418489355498, "grad_norm": 1.024871587753296, "learning_rate": 1e-06, "loss": -0.0606, "step": 856 }, { "clip_ratio/high_max": 0.0022972002734604757, "clip_ratio/high_mean": 0.0008910921460483223, "clip_ratio/low_mean": 0.0007642098807991715, "clip_ratio/low_min": 5.978519948257599e-05, "clip_ratio/region_mean": 0.0016553020104765892, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 1003.8839721679688, "completions/mean_terminated_length": 671.3572387695312, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.0023330417031207, "grad_norm": 0.42047256231307983, "learning_rate": 1e-06, "loss": -0.0404, "num_tokens": 132621526.0, "reward": 0.5658482313156128, "reward_std": 0.18381400406360626, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 857 }, { "clip_ratio/high_max": 0.003116651721938979, "clip_ratio/high_mean": 0.0011043048070860095, "clip_ratio/low_mean": 0.000985980725090485, "clip_ratio/low_min": 6.42499126115581e-05, "clip_ratio/region_mean": 0.0020902855321764946, "epoch": 2.004666083406241, "grad_norm": 0.3271386921405792, "learning_rate": 1e-06, "loss": -0.0406, "step": 858 }, { "clip_ratio/high_max": 0.003035455862118397, "clip_ratio/high_mean": 0.001133696508986759, "clip_ratio/low_mean": 0.0011873762668983545, "clip_ratio/low_min": 0.00014357243708218448, "clip_ratio/region_mean": 0.002321072774066124, "epoch": 2.0069991251093615, "grad_norm": 0.2741071879863739, "learning_rate": 1e-06, "loss": -0.0408, "step": 859 }, { "clip_ratio/high_max": 0.0024617022208985873, "clip_ratio/high_mean": 0.000986086344710202, "clip_ratio/low_mean": 0.0013637412994285114, "clip_ratio/low_min": 0.00013399829367699567, "clip_ratio/region_mean": 0.0023498276932514273, "epoch": 2.0093321668124817, "grad_norm": 0.2776053845882416, "learning_rate": 1e-06, "loss": -0.0408, "step": 860 }, { "clip_ratio/high_max": 0.0022610429587075487, "clip_ratio/high_mean": 0.0006993085316935321, "clip_ratio/low_mean": 0.0006205726785992738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013198812121117953, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3404.0, "completions/mean_length": 1145.7154541015625, "completions/mean_terminated_length": 649.513671875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 2.0116652085156024, "grad_norm": 1.6499103307724, "learning_rate": 1e-06, "loss": -0.0341, "num_tokens": 133209383.0, "reward": 0.5323660969734192, "reward_std": 0.1278291642665863, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 861 }, { "clip_ratio/high_max": 0.002319060113222804, "clip_ratio/high_mean": 0.0007738856302239583, "clip_ratio/low_mean": 0.0008351304932148196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016090160788735375, "epoch": 2.0139982502187226, "grad_norm": 0.7466989159584045, "learning_rate": 1e-06, "loss": -0.0343, "step": 862 }, { "clip_ratio/high_max": 0.0028512449134723283, "clip_ratio/high_mean": 0.0008810326507955324, "clip_ratio/low_mean": 0.0009840381899266504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018650707934284583, "epoch": 2.0163312919218432, "grad_norm": 0.24768130481243134, "learning_rate": 1e-06, "loss": -0.0345, "step": 863 }, { "clip_ratio/high_max": 0.0025711729540489614, "clip_ratio/high_mean": 0.000823824080725899, "clip_ratio/low_mean": 0.0010412318297312595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018650559140951373, "epoch": 2.0186643336249634, "grad_norm": 0.23575639724731445, "learning_rate": 1e-06, "loss": -0.0346, "step": 864 }, { "clip_ratio/high_max": 0.002272085410368163, "clip_ratio/high_mean": 0.0008576683685532771, "clip_ratio/low_mean": 0.0007864589861128479, "clip_ratio/low_min": 1.578681440150831e-05, "clip_ratio/region_mean": 0.0016441273801319767, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3673.0, "completions/mean_length": 1128.9296875, "completions/mean_terminated_length": 611.7339477539062, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 2.020997375328084, "grad_norm": 1.055468201637268, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 133769832.0, "reward": 0.5491071939468384, "reward_std": 0.15781274437904358, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 865 }, { "clip_ratio/high_max": 0.002570376691437559, "clip_ratio/high_mean": 0.0009735539779285318, "clip_ratio/low_mean": 0.0010250855302729178, "clip_ratio/low_min": 6.381457933457568e-05, "clip_ratio/region_mean": 0.001998639512748923, "epoch": 2.0233304170312043, "grad_norm": 28.193599700927734, "learning_rate": 1e-06, "loss": -0.0687, "step": 866 }, { "clip_ratio/high_max": 0.002646934997756034, "clip_ratio/high_mean": 0.0009740409586811438, "clip_ratio/low_mean": 0.0009820285649766447, "clip_ratio/low_min": 2.5525832825223915e-05, "clip_ratio/region_mean": 0.0019560695145628415, "epoch": 2.025663458734325, "grad_norm": 0.3134438991546631, "learning_rate": 1e-06, "loss": -0.0695, "step": 867 }, { "clip_ratio/high_max": 0.0028016765354550444, "clip_ratio/high_mean": 0.0010023872637248132, "clip_ratio/low_mean": 0.0012704570035566576, "clip_ratio/low_min": 5.7077624660450965e-05, "clip_ratio/region_mean": 0.002272844227263704, "epoch": 2.027996500437445, "grad_norm": 0.487965852022171, "learning_rate": 1e-06, "loss": -0.0695, "step": 868 }, { "clip_ratio/high_max": 0.002343198560993187, "clip_ratio/high_mean": 0.0008524865024810424, "clip_ratio/low_mean": 0.0005947178415226517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014472043330897577, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 1147.9754638671875, "completions/mean_terminated_length": 713.8873291015625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.030329542140566, "grad_norm": 2.323728561401367, "learning_rate": 1e-06, "loss": -0.0348, "num_tokens": 134418690.0, "reward": 0.478794664144516, "reward_std": 0.15420952439308167, "rewards/verify_math_reward/mean": 0.4787946343421936, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 869 }, { "clip_ratio/high_max": 0.002693087670195382, "clip_ratio/high_mean": 0.0009041933644766686, "clip_ratio/low_mean": 0.0006407927812688285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015449861530214548, "epoch": 2.032662583843686, "grad_norm": 3.198934555053711, "learning_rate": 1e-06, "loss": -0.0351, "step": 870 }, { "clip_ratio/high_max": 0.0028196720595587976, "clip_ratio/high_mean": 0.0010032593236246612, "clip_ratio/low_mean": 0.0006453340656662476, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016485933738294989, "epoch": 2.0349956255468067, "grad_norm": 0.27979639172554016, "learning_rate": 1e-06, "loss": -0.0352, "step": 871 }, { "clip_ratio/high_max": 0.0024981860915431753, "clip_ratio/high_mean": 0.0009181688674289035, "clip_ratio/low_mean": 0.0008215358593588462, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017397047122358344, "epoch": 2.037328667249927, "grad_norm": 0.21909157931804657, "learning_rate": 1e-06, "loss": -0.0353, "step": 872 }, { "clip_ratio/high_max": 0.0025199791489285417, "clip_ratio/high_mean": 0.0011224939335079398, "clip_ratio/low_mean": 0.0007527473590016598, "clip_ratio/low_min": 1.2852148756792303e-05, "clip_ratio/region_mean": 0.001875241294328589, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3765.0, "completions/mean_length": 1038.0625, "completions/mean_terminated_length": 587.7900390625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.0396617089530475, "grad_norm": 0.4831615388393402, "learning_rate": 1e-06, "loss": -0.0635, "num_tokens": 134965098.0, "reward": 0.6316964626312256, "reward_std": 0.18404366075992584, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 873 }, { "clip_ratio/high_max": 0.002874345504096709, "clip_ratio/high_mean": 0.0013199570712458808, "clip_ratio/low_mean": 0.0009785580241441494, "clip_ratio/low_min": 2.0498524463619106e-05, "clip_ratio/region_mean": 0.0022985150644672103, "epoch": 2.041994750656168, "grad_norm": 0.34790271520614624, "learning_rate": 1e-06, "loss": -0.064, "step": 874 }, { "clip_ratio/high_max": 0.0029040189983788878, "clip_ratio/high_mean": 0.0013257676801003981, "clip_ratio/low_mean": 0.001177882579213474, "clip_ratio/low_min": 1.3045293599134311e-05, "clip_ratio/region_mean": 0.0025036502120201476, "epoch": 2.0443277923592884, "grad_norm": 1.2810522317886353, "learning_rate": 1e-06, "loss": -0.0641, "step": 875 }, { "clip_ratio/high_max": 0.0030651925335405394, "clip_ratio/high_mean": 0.0013183013998059323, "clip_ratio/low_mean": 0.0012874302665295545, "clip_ratio/low_min": 6.223859963938594e-05, "clip_ratio/region_mean": 0.0026057316063088365, "epoch": 2.046660834062409, "grad_norm": 0.3733972907066345, "learning_rate": 1e-06, "loss": -0.0642, "step": 876 }, { "clip_ratio/high_max": 0.002328331167518627, "clip_ratio/high_mean": 0.0009402452069480205, "clip_ratio/low_mean": 0.0006176343331389944, "clip_ratio/low_min": 2.8363965611788444e-05, "clip_ratio/region_mean": 0.0015578795500914566, "completions/clipped_ratio": 0.1953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3666.0, "completions/mean_length": 1343.357177734375, "completions/mean_terminated_length": 675.239990234375, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 2.0489938757655293, "grad_norm": 0.3365660309791565, "learning_rate": 1e-06, "loss": -0.1069, "num_tokens": 135546482.0, "reward": 0.5212053656578064, "reward_std": 0.18449324369430542, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 877 }, { "clip_ratio/high_max": 0.0029872300510760397, "clip_ratio/high_mean": 0.001092728318326408, "clip_ratio/low_mean": 0.0008013987180675031, "clip_ratio/low_min": 2.712462082854472e-05, "clip_ratio/region_mean": 0.0018941270463983528, "epoch": 2.05132691746865, "grad_norm": 0.982396125793457, "learning_rate": 1e-06, "loss": -0.1072, "step": 878 }, { "clip_ratio/high_max": 0.0029425177417579107, "clip_ratio/high_mean": 0.0011285567888990045, "clip_ratio/low_mean": 0.0009221152422469459, "clip_ratio/low_min": 7.703093979216646e-05, "clip_ratio/region_mean": 0.0020506720611592755, "epoch": 2.05365995917177, "grad_norm": 0.3089510202407837, "learning_rate": 1e-06, "loss": -0.1073, "step": 879 }, { "clip_ratio/high_max": 0.0029422525403788313, "clip_ratio/high_mean": 0.0011148026533192024, "clip_ratio/low_mean": 0.001091067348170327, "clip_ratio/low_min": 5.300989778334042e-05, "clip_ratio/region_mean": 0.0022058699832996354, "epoch": 2.055993000874891, "grad_norm": 0.29226964712142944, "learning_rate": 1e-06, "loss": -0.1075, "step": 880 }, { "clip_ratio/high_max": 0.0021294680118444376, "clip_ratio/high_mean": 0.0008094057611742755, "clip_ratio/low_mean": 0.0006546680024257512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014640737681475002, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 1087.4498291015625, "completions/mean_terminated_length": 617.727783203125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.058326042578011, "grad_norm": 0.2894868850708008, "learning_rate": 1e-06, "loss": -0.032, "num_tokens": 136113133.0, "reward": 0.5870535969734192, "reward_std": 0.12787306308746338, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 881 }, { "clip_ratio/high_max": 0.002647999492182862, "clip_ratio/high_mean": 0.0009786869995878078, "clip_ratio/low_mean": 0.0007038409949018387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001682527974480763, "epoch": 2.0606590842811316, "grad_norm": 0.5084614753723145, "learning_rate": 1e-06, "loss": -0.032, "step": 882 }, { "clip_ratio/high_max": 0.002453728797263466, "clip_ratio/high_mean": 0.0009199765754601685, "clip_ratio/low_mean": 0.0009904046110023046, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001910381171910558, "epoch": 2.062992125984252, "grad_norm": 0.26263660192489624, "learning_rate": 1e-06, "loss": -0.0323, "step": 883 }, { "clip_ratio/high_max": 0.002588728446426103, "clip_ratio/high_mean": 0.0008708847890375182, "clip_ratio/low_mean": 0.0010661948272172594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019370796144357882, "epoch": 2.0653251676873725, "grad_norm": 0.32889029383659363, "learning_rate": 1e-06, "loss": -0.0323, "step": 884 }, { "clip_ratio/high_max": 0.0027362724576960318, "clip_ratio/high_mean": 0.0010770971148303943, "clip_ratio/low_mean": 0.0008861806618369883, "clip_ratio/low_min": 4.844963950745296e-05, "clip_ratio/region_mean": 0.001963277791219298, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 1160.83935546875, "completions/mean_terminated_length": 667.1812133789062, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 2.0676582093904927, "grad_norm": 0.39705681800842285, "learning_rate": 1e-06, "loss": -0.0709, "num_tokens": 136716509.0, "reward": 0.5234375, "reward_std": 0.18870417773723602, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 885 }, { "clip_ratio/high_max": 0.0028556392644532025, "clip_ratio/high_mean": 0.0011938789175474085, "clip_ratio/low_mean": 0.0011080631993536372, "clip_ratio/low_min": 0.00010669686616893159, "clip_ratio/region_mean": 0.0023019420987111516, "epoch": 2.0699912510936134, "grad_norm": 0.6972605586051941, "learning_rate": 1e-06, "loss": -0.0711, "step": 886 }, { "clip_ratio/high_max": 0.002851308454410173, "clip_ratio/high_mean": 0.0011980074850725941, "clip_ratio/low_mean": 0.0012975577010365669, "clip_ratio/low_min": 0.00012064987276971806, "clip_ratio/region_mean": 0.0024955651533673517, "epoch": 2.0723242927967336, "grad_norm": 0.2827794849872589, "learning_rate": 1e-06, "loss": -0.0713, "step": 887 }, { "clip_ratio/high_max": 0.002522249997127801, "clip_ratio/high_mean": 0.0010886837881116662, "clip_ratio/low_mean": 0.0014571396604878828, "clip_ratio/low_min": 0.00012304454139666632, "clip_ratio/region_mean": 0.0025458234740654007, "epoch": 2.0746573344998542, "grad_norm": 0.3214990794658661, "learning_rate": 1e-06, "loss": -0.0713, "step": 888 }, { "clip_ratio/high_max": 0.003073689731536433, "clip_ratio/high_mean": 0.0012315029161982238, "clip_ratio/low_mean": 0.0008666165449540131, "clip_ratio/low_min": 7.985185584402643e-05, "clip_ratio/region_mean": 0.002098119461152237, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3720.0, "completions/mean_length": 1081.9342041015625, "completions/mean_terminated_length": 651.3533325195312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.0769903762029744, "grad_norm": 0.36969879269599915, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 137328290.0, "reward": 0.5814732313156128, "reward_std": 0.19453300535678864, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 889 }, { "clip_ratio/high_max": 0.003361968971148599, "clip_ratio/high_mean": 0.001345498429145664, "clip_ratio/low_mean": 0.0011420434620958986, "clip_ratio/low_min": 6.100734753999859e-05, "clip_ratio/region_mean": 0.0024875418675947003, "epoch": 2.079323417906095, "grad_norm": 0.3817501366138458, "learning_rate": 1e-06, "loss": -0.0396, "step": 890 }, { "clip_ratio/high_max": 0.0037169754068600014, "clip_ratio/high_mean": 0.001399846762069501, "clip_ratio/low_mean": 0.0012452459468477173, "clip_ratio/low_min": 0.000101055155028007, "clip_ratio/region_mean": 0.0026450927180121653, "epoch": 2.0816564596092153, "grad_norm": 0.2818514406681061, "learning_rate": 1e-06, "loss": -0.0397, "step": 891 }, { "clip_ratio/high_max": 0.0036122812307439744, "clip_ratio/high_mean": 0.0013823737299389904, "clip_ratio/low_mean": 0.0014095221431489335, "clip_ratio/low_min": 9.563969615555834e-05, "clip_ratio/region_mean": 0.0027918958439840935, "epoch": 2.083989501312336, "grad_norm": 0.35881802439689636, "learning_rate": 1e-06, "loss": -0.0397, "step": 892 }, { "clip_ratio/high_max": 0.0031301029739552177, "clip_ratio/high_mean": 0.0012524443245638395, "clip_ratio/low_mean": 0.0007899156134953955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002042359941697214, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3819.0, "completions/mean_length": 1083.1138916015625, "completions/mean_terminated_length": 678.8531494140625, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 2.0863225430154566, "grad_norm": 0.5770424008369446, "learning_rate": 1e-06, "loss": -0.0555, "num_tokens": 137950288.0, "reward": 0.637276828289032, "reward_std": 0.18295595049858093, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 893 }, { "clip_ratio/high_max": 0.0033769789297366515, "clip_ratio/high_mean": 0.0013574555196100846, "clip_ratio/low_mean": 0.0011881534173880937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025456089279032312, "epoch": 2.088655584718577, "grad_norm": 1.3119981288909912, "learning_rate": 1e-06, "loss": -0.0555, "step": 894 }, { "clip_ratio/high_max": 0.003853984708257485, "clip_ratio/high_mean": 0.0015149862811085768, "clip_ratio/low_mean": 0.0013871388146071695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029021251102676615, "epoch": 2.0909886264216975, "grad_norm": 0.4202233552932739, "learning_rate": 1e-06, "loss": -0.0559, "step": 895 }, { "clip_ratio/high_max": 0.00342563955928199, "clip_ratio/high_mean": 0.0014018227720953291, "clip_ratio/low_mean": 0.0015406980019179173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029425206885207444, "epoch": 2.0933216681248177, "grad_norm": 0.5590657591819763, "learning_rate": 1e-06, "loss": -0.056, "step": 896 }, { "clip_ratio/high_max": 0.003143565889331512, "clip_ratio/high_mean": 0.0011638165815384127, "clip_ratio/low_mean": 0.001134115146669501, "clip_ratio/low_min": 3.249385190429166e-05, "clip_ratio/region_mean": 0.0022979317291174084, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 1303.9163818359375, "completions/mean_terminated_length": 669.004150390625, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 2.0956547098279383, "grad_norm": 4.953127384185791, "learning_rate": 1e-06, "loss": -0.0716, "num_tokens": 138537069.0, "reward": 0.4810267984867096, "reward_std": 0.18881545960903168, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991899728775024, "step": 897 }, { "clip_ratio/high_max": 0.002721304634178523, "clip_ratio/high_mean": 0.0010936348080576863, "clip_ratio/low_mean": 0.0012985966895939782, "clip_ratio/low_min": 2.170515654142946e-05, "clip_ratio/region_mean": 0.00239223145035794, "epoch": 2.0979877515310585, "grad_norm": 0.6160867214202881, "learning_rate": 1e-06, "loss": -0.0719, "step": 898 }, { "clip_ratio/high_max": 0.0039855076756794006, "clip_ratio/high_mean": 0.001341547798801912, "clip_ratio/low_mean": 0.0015091649947862606, "clip_ratio/low_min": 4.334643017500639e-05, "clip_ratio/region_mean": 0.0028507128154160455, "epoch": 2.100320793234179, "grad_norm": 0.46486419439315796, "learning_rate": 1e-06, "loss": -0.0722, "step": 899 }, { "clip_ratio/high_max": 0.0038218230183701962, "clip_ratio/high_mean": 0.0013429634236672428, "clip_ratio/low_mean": 0.00160860802498064, "clip_ratio/low_min": 1.4220705452316906e-05, "clip_ratio/region_mean": 0.0029515714340959676, "epoch": 2.1026538349372994, "grad_norm": 0.2979956567287445, "learning_rate": 1e-06, "loss": -0.0725, "step": 900 }, { "clip_ratio/high_max": 0.002866292583348695, "clip_ratio/high_mean": 0.0011299274628981948, "clip_ratio/low_mean": 0.0007768942887196317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019068217588937841, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 1260.864990234375, "completions/mean_terminated_length": 690.797607421875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 2.10498687664042, "grad_norm": 26.61112403869629, "learning_rate": 1e-06, "loss": -0.075, "num_tokens": 139144252.0, "reward": 0.5412946939468384, "reward_std": 0.1878051608800888, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 901 }, { "clip_ratio/high_max": 0.0027768110303441063, "clip_ratio/high_mean": 0.0011122915311716497, "clip_ratio/low_mean": 0.0007727429547230713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018850345149985515, "epoch": 2.1073199183435403, "grad_norm": 0.4404367506504059, "learning_rate": 1e-06, "loss": -0.0766, "step": 902 }, { "clip_ratio/high_max": 0.0036372511167428456, "clip_ratio/high_mean": 0.0012605273550434504, "clip_ratio/low_mean": 0.0009772507110028528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022377780769602396, "epoch": 2.109652960046661, "grad_norm": 1.0542540550231934, "learning_rate": 1e-06, "loss": -0.0768, "step": 903 }, { "clip_ratio/high_max": 0.00320668300264515, "clip_ratio/high_mean": 0.0012399408151395619, "clip_ratio/low_mean": 0.0012102037871954963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024501445805071853, "epoch": 2.111986001749781, "grad_norm": 0.3541455864906311, "learning_rate": 1e-06, "loss": -0.0771, "step": 904 }, { "clip_ratio/high_max": 0.002565014801803045, "clip_ratio/high_mean": 0.0009630673084757291, "clip_ratio/low_mean": 0.0007278864204636193, "clip_ratio/low_min": 7.717625703662634e-05, "clip_ratio/region_mean": 0.0016909537080209702, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 1066.4910888671875, "completions/mean_terminated_length": 660.0, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 2.114319043452902, "grad_norm": 0.2920326590538025, "learning_rate": 1e-06, "loss": -0.0598, "num_tokens": 139748404.0, "reward": 0.6171875, "reward_std": 0.17659901082515717, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 905 }, { "clip_ratio/high_max": 0.0028116988978581503, "clip_ratio/high_mean": 0.0010232415843347553, "clip_ratio/low_mean": 0.0009304448485636385, "clip_ratio/low_min": 4.0244689444079995e-05, "clip_ratio/region_mean": 0.00195368638378568, "epoch": 2.116652085156022, "grad_norm": 1.7059730291366577, "learning_rate": 1e-06, "loss": -0.0599, "step": 906 }, { "clip_ratio/high_max": 0.0028795163962058723, "clip_ratio/high_mean": 0.0011422177121858113, "clip_ratio/low_mean": 0.0011000340036844136, "clip_ratio/low_min": 6.772319102310576e-05, "clip_ratio/region_mean": 0.0022422516703954898, "epoch": 2.1189851268591426, "grad_norm": 0.5355476140975952, "learning_rate": 1e-06, "loss": -0.06, "step": 907 }, { "clip_ratio/high_max": 0.0029035531188128516, "clip_ratio/high_mean": 0.001097669781302102, "clip_ratio/low_mean": 0.001237262287759222, "clip_ratio/low_min": 0.00010106210902449675, "clip_ratio/region_mean": 0.0023349320399574935, "epoch": 2.121318168562263, "grad_norm": 1.1047362089157104, "learning_rate": 1e-06, "loss": -0.0599, "step": 908 }, { "clip_ratio/high_max": 0.002512164239306003, "clip_ratio/high_mean": 0.0010870905352931004, "clip_ratio/low_mean": 0.0005407030885180575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016277936301776208, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3887.0, "completions/mean_length": 1154.5223388671875, "completions/mean_terminated_length": 637.2545776367188, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 2.1236512102653835, "grad_norm": 0.426114559173584, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 140322400.0, "reward": 0.582589328289032, "reward_std": 0.17833246290683746, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 909 }, { "clip_ratio/high_max": 0.002888229275413323, "clip_ratio/high_mean": 0.0013756450243818108, "clip_ratio/low_mean": 0.0006492569445981644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002024901994445827, "epoch": 2.1259842519685037, "grad_norm": 0.4038192331790924, "learning_rate": 1e-06, "loss": -0.0494, "step": 910 }, { "clip_ratio/high_max": 0.00259706854558317, "clip_ratio/high_mean": 0.0012387173956085462, "clip_ratio/low_mean": 0.0008468953660667466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002085612803057302, "epoch": 2.1283172936716244, "grad_norm": 0.3161894679069519, "learning_rate": 1e-06, "loss": -0.0495, "step": 911 }, { "clip_ratio/high_max": 0.002489079619408585, "clip_ratio/high_mean": 0.0011988411824859213, "clip_ratio/low_mean": 0.0009604899560144986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021593311757897027, "epoch": 2.130650335374745, "grad_norm": 0.24112068116664886, "learning_rate": 1e-06, "loss": -0.0496, "step": 912 }, { "clip_ratio/high_max": 0.0020600908756023273, "clip_ratio/high_mean": 0.0008546238823328167, "clip_ratio/low_mean": 0.0006376872916007414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014923111448297277, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3788.0, "completions/mean_length": 1270.997802734375, "completions/mean_terminated_length": 684.676513671875, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 2.1329833770778652, "grad_norm": 1.5914798974990845, "learning_rate": 1e-06, "loss": -0.0628, "num_tokens": 140925366.0, "reward": 0.5334821939468384, "reward_std": 0.162998229265213, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 913 }, { "clip_ratio/high_max": 0.002620365434268024, "clip_ratio/high_mean": 0.0009853904630290344, "clip_ratio/low_mean": 0.0007978257672220934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017832162047852762, "epoch": 2.135316418780986, "grad_norm": 0.37435391545295715, "learning_rate": 1e-06, "loss": -0.0629, "step": 914 }, { "clip_ratio/high_max": 0.0027319670989527367, "clip_ratio/high_mean": 0.0009977128138416447, "clip_ratio/low_mean": 0.0009686973935458809, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001966410221939441, "epoch": 2.137649460484106, "grad_norm": 0.9419366717338562, "learning_rate": 1e-06, "loss": -0.0631, "step": 915 }, { "clip_ratio/high_max": 0.0029081570537528023, "clip_ratio/high_mean": 0.001095517127396306, "clip_ratio/low_mean": 0.0011299456764390925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022254628347582184, "epoch": 2.1399825021872267, "grad_norm": 0.44014012813568115, "learning_rate": 1e-06, "loss": -0.0632, "step": 916 }, { "clip_ratio/high_max": 0.002777444227831438, "clip_ratio/high_mean": 0.001154653627963853, "clip_ratio/low_mean": 0.0006237555753614288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017784091687644832, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2400.0, "completions/mean_length": 1098.6663818359375, "completions/mean_terminated_length": 608.1934814453125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 2.142315543890347, "grad_norm": 1.04115891456604, "learning_rate": 1e-06, "loss": -0.1243, "num_tokens": 141479267.0, "reward": 0.5758928656578064, "reward_std": 0.1742018610239029, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 917 }, { "clip_ratio/high_max": 0.002884137866203673, "clip_ratio/high_mean": 0.0012699403687292943, "clip_ratio/low_mean": 0.0008146530153680942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020845933904638514, "epoch": 2.1446485855934676, "grad_norm": 0.29865145683288574, "learning_rate": 1e-06, "loss": -0.1247, "step": 918 }, { "clip_ratio/high_max": 0.002716042145038955, "clip_ratio/high_mean": 0.0011536991405591834, "clip_ratio/low_mean": 0.0009777492778084707, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002131448440195527, "epoch": 2.146981627296588, "grad_norm": 0.30186283588409424, "learning_rate": 1e-06, "loss": -0.1247, "step": 919 }, { "clip_ratio/high_max": 0.002865830894734245, "clip_ratio/high_mean": 0.001174216034996789, "clip_ratio/low_mean": 0.0011046643448935356, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022788804344600067, "epoch": 2.1493146689997085, "grad_norm": 0.35737496614456177, "learning_rate": 1e-06, "loss": -0.1249, "step": 920 }, { "clip_ratio/high_max": 0.0023828887296986068, "clip_ratio/high_mean": 0.0007726374788035173, "clip_ratio/low_mean": 0.0005882314080736251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013608688677777536, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3690.0, "completions/mean_length": 1091.84716796875, "completions/mean_terminated_length": 653.9015502929688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.1516477107028287, "grad_norm": 3.5590226650238037, "learning_rate": 1e-06, "loss": -0.0304, "num_tokens": 142080354.0, "reward": 0.5714285969734192, "reward_std": 0.14766673743724823, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 921 }, { "clip_ratio/high_max": 0.0027588307566475123, "clip_ratio/high_mean": 0.0009729995508678257, "clip_ratio/low_mean": 0.0007157287545851432, "clip_ratio/low_min": 1.0151047717954498e-05, "clip_ratio/region_mean": 0.0016887282763491385, "epoch": 2.1539807524059493, "grad_norm": 0.3675619065761566, "learning_rate": 1e-06, "loss": -0.0308, "step": 922 }, { "clip_ratio/high_max": 0.00287230185494991, "clip_ratio/high_mean": 0.0010135241027455777, "clip_ratio/low_mean": 0.0008363535544049228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018498776043998078, "epoch": 2.1563137941090695, "grad_norm": 0.3827384114265442, "learning_rate": 1e-06, "loss": -0.0308, "step": 923 }, { "clip_ratio/high_max": 0.00296986433386337, "clip_ratio/high_mean": 0.0009908454630931374, "clip_ratio/low_mean": 0.0010255462584609631, "clip_ratio/low_min": 1.0151047717954498e-05, "clip_ratio/region_mean": 0.002016391670622397, "epoch": 2.15864683581219, "grad_norm": 0.2588915526866913, "learning_rate": 1e-06, "loss": -0.0309, "step": 924 }, { "clip_ratio/high_max": 0.0021203413743933197, "clip_ratio/high_mean": 0.0007769243311486207, "clip_ratio/low_mean": 0.0007277360527950805, "clip_ratio/low_min": 5.1867220463464037e-05, "clip_ratio/region_mean": 0.0015046603948576376, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3310.0, "completions/mean_length": 1341.591552734375, "completions/mean_terminated_length": 705.9588012695312, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.1609798775153104, "grad_norm": 3.8680458068847656, "learning_rate": 1e-06, "loss": -0.0422, "num_tokens": 142684668.0, "reward": 0.5, "reward_std": 0.16645877063274384, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 925 }, { "clip_ratio/high_max": 0.002306196249264758, "clip_ratio/high_mean": 0.0008120376551232766, "clip_ratio/low_mean": 0.0007399147925752914, "clip_ratio/low_min": 4.610844553099014e-05, "clip_ratio/region_mean": 0.0015519524495175574, "epoch": 2.163312919218431, "grad_norm": 0.609117329120636, "learning_rate": 1e-06, "loss": -0.0442, "step": 926 }, { "clip_ratio/high_max": 0.0022493886644952, "clip_ratio/high_mean": 0.0008440758592769271, "clip_ratio/low_mean": 0.0009111381314141909, "clip_ratio/low_min": 2.5933610231732018e-05, "clip_ratio/region_mean": 0.0017552139979670756, "epoch": 2.1656459609215517, "grad_norm": 0.270142525434494, "learning_rate": 1e-06, "loss": -0.0445, "step": 927 }, { "clip_ratio/high_max": 0.0022418977387133054, "clip_ratio/high_mean": 0.0008723206592549104, "clip_ratio/low_mean": 0.0010157493579754373, "clip_ratio/low_min": 4.610844553099014e-05, "clip_ratio/region_mean": 0.0018880699717556126, "epoch": 2.167979002624672, "grad_norm": 0.22420582175254822, "learning_rate": 1e-06, "loss": -0.0445, "step": 928 }, { "clip_ratio/high_max": 0.002526821983337868, "clip_ratio/high_mean": 0.0009691499071777798, "clip_ratio/low_mean": 0.0008302801270474447, "clip_ratio/low_min": 5.122707807458937e-05, "clip_ratio/region_mean": 0.0017994300287682563, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3992.0, "completions/mean_length": 1165.0457763671875, "completions/mean_terminated_length": 711.805419921875, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 2.1703120443277926, "grad_norm": 0.5007263422012329, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 143328293.0, "reward": 0.4955357313156128, "reward_std": 0.19012564420700073, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 929 }, { "clip_ratio/high_max": 0.0026338091993238777, "clip_ratio/high_mean": 0.0010466624753462384, "clip_ratio/low_mean": 0.0009894379363686312, "clip_ratio/low_min": 6.242230119823944e-05, "clip_ratio/region_mean": 0.00203610047174152, "epoch": 2.1726450860309128, "grad_norm": 0.4224990904331207, "learning_rate": 1e-06, "loss": -0.0412, "step": 930 }, { "clip_ratio/high_max": 0.002590185460576322, "clip_ratio/high_mean": 0.0010549045582592953, "clip_ratio/low_mean": 0.0011856964156322647, "clip_ratio/low_min": 0.00011555532182683237, "clip_ratio/region_mean": 0.0022406009811675176, "epoch": 2.1749781277340334, "grad_norm": 0.32505133748054504, "learning_rate": 1e-06, "loss": -0.0413, "step": 931 }, { "clip_ratio/high_max": 0.0025145736799458973, "clip_ratio/high_mean": 0.0010291184498782968, "clip_ratio/low_mean": 0.0012310733272897778, "clip_ratio/low_min": 8.019193501240807e-05, "clip_ratio/region_mean": 0.002260191788082011, "epoch": 2.1773111694371536, "grad_norm": 0.23753145337104797, "learning_rate": 1e-06, "loss": -0.0414, "step": 932 }, { "clip_ratio/high_max": 0.0027206890808884054, "clip_ratio/high_mean": 0.0009410209513589507, "clip_ratio/low_mean": 0.0006591842275156523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016002051779651083, "completions/clipped_ratio": 0.1796875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 1268.82373046875, "completions/mean_terminated_length": 649.5374145507812, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.1796442111402743, "grad_norm": 0.35164058208465576, "learning_rate": 1e-06, "loss": -0.0848, "num_tokens": 143912967.0, "reward": 0.455357164144516, "reward_std": 0.17036296427249908, "rewards/verify_math_reward/mean": 0.4553571343421936, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 933 }, { "clip_ratio/high_max": 0.003341510375321377, "clip_ratio/high_mean": 0.001119890983318328, "clip_ratio/low_mean": 0.0008933594799600542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002013250435993541, "epoch": 2.1819772528433945, "grad_norm": 0.5610590577125549, "learning_rate": 1e-06, "loss": -0.0889, "step": 934 }, { "clip_ratio/high_max": 0.0033260092313867062, "clip_ratio/high_mean": 0.001190822495118482, "clip_ratio/low_mean": 0.0010441812337376177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002235003739770036, "epoch": 2.184310294546515, "grad_norm": 0.23199562728405, "learning_rate": 1e-06, "loss": -0.0893, "step": 935 }, { "clip_ratio/high_max": 0.0033603518750169314, "clip_ratio/high_mean": 0.0011026296742784325, "clip_ratio/low_mean": 0.001212208057040698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002314837765879929, "epoch": 2.1866433362496354, "grad_norm": 0.24544578790664673, "learning_rate": 1e-06, "loss": -0.0894, "step": 936 }, { "clip_ratio/high_max": 0.0032584453947492875, "clip_ratio/high_mean": 0.001269403968763072, "clip_ratio/low_mean": 0.0008361640311704832, "clip_ratio/low_min": 1.091703052225057e-05, "clip_ratio/region_mean": 0.002105567982653156, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 1076.59716796875, "completions/mean_terminated_length": 605.1806640625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.188976377952756, "grad_norm": 0.775518000125885, "learning_rate": 1e-06, "loss": -0.0789, "num_tokens": 144469278.0, "reward": 0.582589328289032, "reward_std": 0.2142850011587143, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 937 }, { "clip_ratio/high_max": 0.003595775007852353, "clip_ratio/high_mean": 0.001531794448965229, "clip_ratio/low_mean": 0.001094345254387008, "clip_ratio/low_min": 2.183406104450114e-05, "clip_ratio/region_mean": 0.0026261397724738345, "epoch": 2.1913094196558762, "grad_norm": 0.5288658142089844, "learning_rate": 1e-06, "loss": -0.0793, "step": 938 }, { "clip_ratio/high_max": 0.0033775453484850004, "clip_ratio/high_mean": 0.001376819967845222, "clip_ratio/low_mean": 0.0011632960122369695, "clip_ratio/low_min": 1.091703052225057e-05, "clip_ratio/region_mean": 0.00254011604556581, "epoch": 2.193642461358997, "grad_norm": 0.45852425694465637, "learning_rate": 1e-06, "loss": -0.0793, "step": 939 }, { "clip_ratio/high_max": 0.00342442280089017, "clip_ratio/high_mean": 0.0013763813585683238, "clip_ratio/low_mean": 0.0014339402077894192, "clip_ratio/low_min": 3.078059671679512e-05, "clip_ratio/region_mean": 0.002810321602737531, "epoch": 2.195975503062117, "grad_norm": 0.34406086802482605, "learning_rate": 1e-06, "loss": -0.0797, "step": 940 }, { "clip_ratio/high_max": 0.0025815042899921536, "clip_ratio/high_mean": 0.0010002974195231218, "clip_ratio/low_mean": 0.0007481397087758523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017484371273894794, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3450.0, "completions/mean_length": 1134.72998046875, "completions/mean_terminated_length": 632.1644897460938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.1983085447652377, "grad_norm": 0.34119758009910583, "learning_rate": 1e-06, "loss": -0.0515, "num_tokens": 145042948.0, "reward": 0.5636160969734192, "reward_std": 0.17521032691001892, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 941 }, { "clip_ratio/high_max": 0.003196331519575324, "clip_ratio/high_mean": 0.001142490704296506, "clip_ratio/low_mean": 0.0009556240556776174, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002098114728141809, "epoch": 2.200641586468358, "grad_norm": 0.41992729902267456, "learning_rate": 1e-06, "loss": -0.0518, "step": 942 }, { "clip_ratio/high_max": 0.0030432302519329824, "clip_ratio/high_mean": 0.001125496133681736, "clip_ratio/low_mean": 0.001110894247176475, "clip_ratio/low_min": 1.5582148989778943e-05, "clip_ratio/region_mean": 0.0022363903844961897, "epoch": 2.2029746281714786, "grad_norm": 0.35792219638824463, "learning_rate": 1e-06, "loss": -0.0519, "step": 943 }, { "clip_ratio/high_max": 0.002719769734540023, "clip_ratio/high_mean": 0.0011210516022401862, "clip_ratio/low_mean": 0.001246835874098906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023678875077166595, "epoch": 2.205307669874599, "grad_norm": 0.3366990387439728, "learning_rate": 1e-06, "loss": -0.0519, "step": 944 }, { "clip_ratio/high_max": 0.002989219967275858, "clip_ratio/high_mean": 0.000903419768292224, "clip_ratio/low_mean": 0.0006272427162912209, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015306624918594025, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3287.0, "completions/mean_length": 1173.298095703125, "completions/mean_terminated_length": 668.3285522460938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 2.2076407115777195, "grad_norm": 0.40508246421813965, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 145636959.0, "reward": 0.5100446939468384, "reward_std": 0.17043642699718475, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 945 }, { "clip_ratio/high_max": 0.002794872740196297, "clip_ratio/high_mean": 0.0008921482931327773, "clip_ratio/low_mean": 0.0007965968670760049, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016887451492948458, "epoch": 2.20997375328084, "grad_norm": 52.40668487548828, "learning_rate": 1e-06, "loss": -0.0498, "step": 946 }, { "clip_ratio/high_max": 0.002624275271955412, "clip_ratio/high_mean": 0.0008751920104259625, "clip_ratio/low_mean": 0.0007346904876612825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016098825217341073, "epoch": 2.2123067949839603, "grad_norm": 0.4685298204421997, "learning_rate": 1e-06, "loss": -0.0521, "step": 947 }, { "clip_ratio/high_max": 0.0028901178593514487, "clip_ratio/high_mean": 0.000974933267571032, "clip_ratio/low_mean": 0.0010005767235270469, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019755099710891955, "epoch": 2.214639836687081, "grad_norm": 0.28919854760169983, "learning_rate": 1e-06, "loss": -0.0523, "step": 948 }, { "clip_ratio/high_max": 0.002647184010129422, "clip_ratio/high_mean": 0.0008910328178899363, "clip_ratio/low_mean": 0.0009205573060171446, "clip_ratio/low_min": 3.203485539415851e-05, "clip_ratio/region_mean": 0.001811590147553943, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 1091.618408203125, "completions/mean_terminated_length": 658.0357666015625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 2.216972878390201, "grad_norm": 0.4784538745880127, "learning_rate": 1e-06, "loss": -0.0364, "num_tokens": 146233937.0, "reward": 0.5066964626312256, "reward_std": 0.15357083082199097, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 949 }, { "clip_ratio/high_max": 0.002538186148740351, "clip_ratio/high_mean": 0.00091569724099827, "clip_ratio/low_mean": 0.0011455879393906798, "clip_ratio/low_min": 3.203485539415851e-05, "clip_ratio/region_mean": 0.0020612851440091617, "epoch": 2.219305920093322, "grad_norm": 0.28302744030952454, "learning_rate": 1e-06, "loss": -0.0366, "step": 950 }, { "clip_ratio/high_max": 0.00278662553319009, "clip_ratio/high_mean": 0.0009919084350258345, "clip_ratio/low_mean": 0.0012398492199281463, "clip_ratio/low_min": 7.089181599440053e-05, "clip_ratio/region_mean": 0.0022317576585919596, "epoch": 2.221638961796442, "grad_norm": 0.3054825961589813, "learning_rate": 1e-06, "loss": -0.0368, "step": 951 }, { "clip_ratio/high_max": 0.0029278878319018986, "clip_ratio/high_mean": 0.0009694402524473844, "clip_ratio/low_mean": 0.00148083772364771, "clip_ratio/low_min": 5.358674025046639e-05, "clip_ratio/region_mean": 0.002450278014293872, "epoch": 2.2239720034995627, "grad_norm": 0.27107951045036316, "learning_rate": 1e-06, "loss": -0.0368, "step": 952 }, { "clip_ratio/high_max": 0.002904356333601754, "clip_ratio/high_mean": 0.0010795839843922295, "clip_ratio/low_mean": 0.0006849651490483666, "clip_ratio/low_min": 4.1184441215591505e-05, "clip_ratio/region_mean": 0.001764549124345649, "completions/clipped_ratio": 0.21875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3147.0, "completions/mean_length": 1347.3148193359375, "completions/mean_terminated_length": 577.682861328125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.226305045202683, "grad_norm": 0.4037938714027405, "learning_rate": 1e-06, "loss": -0.071, "num_tokens": 146729507.0, "reward": 0.5379464626312256, "reward_std": 0.16285626590251923, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 953 }, { "clip_ratio/high_max": 0.0034089058317476884, "clip_ratio/high_mean": 0.0012039217726851348, "clip_ratio/low_mean": 0.0009460440851398744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021499658323591575, "epoch": 2.2286380869058036, "grad_norm": 0.485542356967926, "learning_rate": 1e-06, "loss": -0.0713, "step": 954 }, { "clip_ratio/high_max": 0.0033412473276257515, "clip_ratio/high_mean": 0.0012357542582321912, "clip_ratio/low_mean": 0.0012359814281808212, "clip_ratio/low_min": 8.362840890185907e-05, "clip_ratio/region_mean": 0.002471735657309182, "epoch": 2.2309711286089238, "grad_norm": 0.366385281085968, "learning_rate": 1e-06, "loss": -0.0715, "step": 955 }, { "clip_ratio/high_max": 0.0029538357484852895, "clip_ratio/high_mean": 0.001137475997893489, "clip_ratio/low_mean": 0.0013681426498806104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002505618664145004, "epoch": 2.2333041703120444, "grad_norm": 0.3081059455871582, "learning_rate": 1e-06, "loss": -0.0716, "step": 956 }, { "clip_ratio/high_max": 0.0030130940431263298, "clip_ratio/high_mean": 0.0010497359544388019, "clip_ratio/low_mean": 0.0005339666868167114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015837026585359126, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3966.0, "completions/mean_length": 1116.4921875, "completions/mean_terminated_length": 574.048828125, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 2.2356372120151646, "grad_norm": 5.766878604888916, "learning_rate": 1e-06, "loss": -0.0674, "num_tokens": 147239780.0, "reward": 0.613839328289032, "reward_std": 0.16293182969093323, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 957 }, { "clip_ratio/high_max": 0.003012447865330614, "clip_ratio/high_mean": 0.0010760296172520611, "clip_ratio/low_mean": 0.000620303044343018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016963326306722593, "epoch": 2.2379702537182853, "grad_norm": 0.3486430048942566, "learning_rate": 1e-06, "loss": -0.0678, "step": 958 }, { "clip_ratio/high_max": 0.002943480671092402, "clip_ratio/high_mean": 0.00110754117849865, "clip_ratio/low_mean": 0.0007640083949809195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018715495971264318, "epoch": 2.2403032954214055, "grad_norm": 0.271214097738266, "learning_rate": 1e-06, "loss": -0.068, "step": 959 }, { "clip_ratio/high_max": 0.0031931614503264427, "clip_ratio/high_mean": 0.0012021760194329545, "clip_ratio/low_mean": 0.0008246758879977278, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002026851878326852, "epoch": 2.242636337124526, "grad_norm": 0.3531985580921173, "learning_rate": 1e-06, "loss": -0.0681, "step": 960 }, { "clip_ratio/high_max": 0.002664045139681548, "clip_ratio/high_mean": 0.0010058504012704361, "clip_ratio/low_mean": 0.0005185484935736895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015243989109876566, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 1282.4342041015625, "completions/mean_terminated_length": 633.1497192382812, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.2449693788276464, "grad_norm": 0.40163454413414, "learning_rate": 1e-06, "loss": -0.0783, "num_tokens": 147789193.0, "reward": 0.5691964626312256, "reward_std": 0.16559043526649475, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 961 }, { "clip_ratio/high_max": 0.0032124888748512603, "clip_ratio/high_mean": 0.0011966880301770288, "clip_ratio/low_mean": 0.0006262310209876887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018229190609417856, "epoch": 2.247302420530767, "grad_norm": 0.45577675104141235, "learning_rate": 1e-06, "loss": -0.0784, "step": 962 }, { "clip_ratio/high_max": 0.0030224134898162447, "clip_ratio/high_mean": 0.0011884515406563878, "clip_ratio/low_mean": 0.0008031367756302643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001991588324017357, "epoch": 2.249635462233887, "grad_norm": 0.320630818605423, "learning_rate": 1e-06, "loss": -0.0787, "step": 963 }, { "clip_ratio/high_max": 0.002840099848981481, "clip_ratio/high_mean": 0.0010832173866219819, "clip_ratio/low_mean": 0.0008867497515439027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019699671465787105, "epoch": 2.251968503937008, "grad_norm": 0.24168018996715546, "learning_rate": 1e-06, "loss": -0.0788, "step": 964 }, { "clip_ratio/high_max": 0.002357761361054145, "clip_ratio/high_mean": 0.0010247818900097627, "clip_ratio/low_mean": 0.00043577770338742994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001460559604311129, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 1122.5390625, "completions/mean_terminated_length": 653.85400390625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.2543015456401285, "grad_norm": 2.06417179107666, "learning_rate": 1e-06, "loss": -0.0767, "num_tokens": 148375244.0, "reward": 0.6160714626312256, "reward_std": 0.18426863849163055, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 965 }, { "clip_ratio/high_max": 0.002831560886988882, "clip_ratio/high_mean": 0.0011119983173557557, "clip_ratio/low_mean": 0.0005349792563720257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016469775946461596, "epoch": 2.2566345873432487, "grad_norm": 0.46656858921051025, "learning_rate": 1e-06, "loss": -0.077, "step": 966 }, { "clip_ratio/high_max": 0.0030432470302912407, "clip_ratio/high_mean": 0.001229013429110637, "clip_ratio/low_mean": 0.0006839412080807961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019129546199110337, "epoch": 2.2589676290463694, "grad_norm": 0.8655084371566772, "learning_rate": 1e-06, "loss": -0.0771, "step": 967 }, { "clip_ratio/high_max": 0.0031764406012371182, "clip_ratio/high_mean": 0.0012684529901889618, "clip_ratio/low_mean": 0.0008798869198471948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021483399614226073, "epoch": 2.2613006707494896, "grad_norm": 0.3657401204109192, "learning_rate": 1e-06, "loss": -0.0773, "step": 968 }, { "clip_ratio/high_max": 0.002323424552741926, "clip_ratio/high_mean": 0.0008455383376713144, "clip_ratio/low_mean": 0.0006227892163224169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014683275403513107, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 1194.97998046875, "completions/mean_terminated_length": 602.2984008789062, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 2.2636337124526102, "grad_norm": 17.103626251220703, "learning_rate": 1e-06, "loss": -0.0812, "num_tokens": 148909010.0, "reward": 0.5479910969734192, "reward_std": 0.14872092008590698, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 969 }, { "clip_ratio/high_max": 0.002081017133605201, "clip_ratio/high_mean": 0.0007667378240512335, "clip_ratio/low_mean": 0.0007187807577793137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014855186236673035, "epoch": 2.2659667541557305, "grad_norm": 0.34251248836517334, "learning_rate": 1e-06, "loss": -0.0821, "step": 970 }, { "clip_ratio/high_max": 0.0027674153679981828, "clip_ratio/high_mean": 0.0010329963461117586, "clip_ratio/low_mean": 0.0008870068504620576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019200031965738162, "epoch": 2.268299795858851, "grad_norm": 1.7405011653900146, "learning_rate": 1e-06, "loss": -0.0819, "step": 971 }, { "clip_ratio/high_max": 0.0023676205819356255, "clip_ratio/high_mean": 0.0009199112373607932, "clip_ratio/low_mean": 0.00092361876340874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018435300153214484, "epoch": 2.2706328375619713, "grad_norm": 0.24375136196613312, "learning_rate": 1e-06, "loss": -0.0824, "step": 972 }, { "clip_ratio/high_max": 0.002068912253889721, "clip_ratio/high_mean": 0.000693252088240115, "clip_ratio/low_mean": 0.000536447962076636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001229700032126857, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 1095.055908203125, "completions/mean_terminated_length": 590.333740234375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.272965879265092, "grad_norm": 0.48935800790786743, "learning_rate": 1e-06, "loss": -0.0347, "num_tokens": 149438860.0, "reward": 0.5948660969734192, "reward_std": 0.11779557168483734, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 973 }, { "clip_ratio/high_max": 0.0027307323762215674, "clip_ratio/high_mean": 0.0008761845056142192, "clip_ratio/low_mean": 0.0006759881471225526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015521726563747507, "epoch": 2.275298920968212, "grad_norm": 0.29336729645729065, "learning_rate": 1e-06, "loss": -0.0349, "step": 974 }, { "clip_ratio/high_max": 0.0025528133628540672, "clip_ratio/high_mean": 0.0009161385969491675, "clip_ratio/low_mean": 0.0007209028472061618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016370414559787605, "epoch": 2.277631962671333, "grad_norm": 0.2594343423843384, "learning_rate": 1e-06, "loss": -0.0351, "step": 975 }, { "clip_ratio/high_max": 0.0023242650349857286, "clip_ratio/high_mean": 0.0008347724251507316, "clip_ratio/low_mean": 0.0008770580143391271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017118304604082368, "epoch": 2.279965004374453, "grad_norm": 0.22170796990394592, "learning_rate": 1e-06, "loss": -0.0352, "step": 976 }, { "clip_ratio/high_max": 0.002100643720041262, "clip_ratio/high_mean": 0.0007776752008794574, "clip_ratio/low_mean": 0.00032928108203122974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011069562769989716, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3560.0, "completions/mean_length": 1214.583740234375, "completions/mean_terminated_length": 644.4639282226562, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.2822980460775737, "grad_norm": 0.3608812093734741, "learning_rate": 1e-06, "loss": -0.0723, "num_tokens": 150004175.0, "reward": 0.582589328289032, "reward_std": 0.13553200662136078, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 977 }, { "clip_ratio/high_max": 0.0033692896031425335, "clip_ratio/high_mean": 0.0011336614516039845, "clip_ratio/low_mean": 0.0004130007951061998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015466622498934157, "epoch": 2.284631087780694, "grad_norm": 0.24165305495262146, "learning_rate": 1e-06, "loss": -0.0725, "step": 978 }, { "clip_ratio/high_max": 0.0030390513129532337, "clip_ratio/high_mean": 0.0010545091008680174, "clip_ratio/low_mean": 0.0005393398009800876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015938489304971881, "epoch": 2.2869641294838146, "grad_norm": 0.2558513581752777, "learning_rate": 1e-06, "loss": -0.0726, "step": 979 }, { "clip_ratio/high_max": 0.0028472500598581973, "clip_ratio/high_mean": 0.0010373736895417096, "clip_ratio/low_mean": 0.0006035095366314636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016408832379966043, "epoch": 2.289297171186935, "grad_norm": 0.17653292417526245, "learning_rate": 1e-06, "loss": -0.0727, "step": 980 }, { "clip_ratio/high_max": 0.0028249504175619222, "clip_ratio/high_mean": 0.001207280625749263, "clip_ratio/low_mean": 0.0008128016497721546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020200823128107004, "completions/clipped_ratio": 0.1986607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3240.0, "completions/mean_length": 1342.474365234375, "completions/mean_terminated_length": 659.8453979492188, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 2.2916302128900554, "grad_norm": 0.3932231366634369, "learning_rate": 1e-06, "loss": -0.0669, "num_tokens": 150570936.0, "reward": 0.515625, "reward_std": 0.19643910229206085, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 981 }, { "clip_ratio/high_max": 0.0034591210351209156, "clip_ratio/high_mean": 0.0014120001178525854, "clip_ratio/low_mean": 0.0011471635571069783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002559163658588659, "epoch": 2.2939632545931756, "grad_norm": 0.33816421031951904, "learning_rate": 1e-06, "loss": -0.0672, "step": 982 }, { "clip_ratio/high_max": 0.003579712829377968, "clip_ratio/high_mean": 0.0014195802650647238, "clip_ratio/low_mean": 0.001232762911968166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026523431224632077, "epoch": 2.2962962962962963, "grad_norm": 4.218838691711426, "learning_rate": 1e-06, "loss": -0.0671, "step": 983 }, { "clip_ratio/high_max": 0.0032868889975361526, "clip_ratio/high_mean": 0.0013357055995584233, "clip_ratio/low_mean": 0.001358836498184246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026945421268464997, "epoch": 2.298629337999417, "grad_norm": 0.35733336210250854, "learning_rate": 1e-06, "loss": -0.0673, "step": 984 }, { "clip_ratio/high_max": 0.003029476327355951, "clip_ratio/high_mean": 0.001146598151535727, "clip_ratio/low_mean": 0.0005053874992881902, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016519856653758325, "completions/clipped_ratio": 0.2310267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 1428.454345703125, "completions/mean_terminated_length": 627.029052734375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.300962379702537, "grad_norm": 0.4142170250415802, "learning_rate": 1e-06, "loss": -0.0879, "num_tokens": 151098647.0, "reward": 0.494419664144516, "reward_std": 0.16142338514328003, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 985 }, { "clip_ratio/high_max": 0.0031548295373795554, "clip_ratio/high_mean": 0.0011314827770547708, "clip_ratio/low_mean": 0.0006934225211807643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018249052845931146, "epoch": 2.303295421405658, "grad_norm": 0.5481168627738953, "learning_rate": 1e-06, "loss": -0.0882, "step": 986 }, { "clip_ratio/high_max": 0.0031549265841022134, "clip_ratio/high_mean": 0.0012335540013737045, "clip_ratio/low_mean": 0.0007629910346622637, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019965450846939348, "epoch": 2.305628463108778, "grad_norm": 140.38577270507812, "learning_rate": 1e-06, "loss": -0.0821, "step": 987 }, { "clip_ratio/high_max": 0.0027597696243901737, "clip_ratio/high_mean": 0.0010273685693391599, "clip_ratio/low_mean": 0.0008408467556364485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001868215338618029, "epoch": 2.3079615048118987, "grad_norm": 0.9917909502983093, "learning_rate": 1e-06, "loss": -0.0881, "step": 988 }, { "clip_ratio/high_max": 0.003028048544365447, "clip_ratio/high_mean": 0.001081721464288421, "clip_ratio/low_mean": 0.0007683532085138722, "clip_ratio/low_min": 4.163161611359101e-05, "clip_ratio/region_mean": 0.001850074673711788, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 1246.032470703125, "completions/mean_terminated_length": 626.4741821289062, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.310294546515019, "grad_norm": 0.40778112411499023, "learning_rate": 1e-06, "loss": -0.0924, "num_tokens": 151650732.0, "reward": 0.5089285969734192, "reward_std": 0.19392497837543488, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 989 }, { "clip_ratio/high_max": 0.0034053331764880568, "clip_ratio/high_mean": 0.001328769045358058, "clip_ratio/low_mean": 0.0009688647942311945, "clip_ratio/low_min": 5.413161579781445e-05, "clip_ratio/region_mean": 0.0022976338368607685, "epoch": 2.3126275882181395, "grad_norm": 0.3833784759044647, "learning_rate": 1e-06, "loss": -0.0928, "step": 990 }, { "clip_ratio/high_max": 0.0033171465183841065, "clip_ratio/high_mean": 0.0012629199190996587, "clip_ratio/low_mean": 0.0011690571373037528, "clip_ratio/low_min": 0.00014621676746173762, "clip_ratio/region_mean": 0.0024319771546288393, "epoch": 2.3149606299212597, "grad_norm": 0.3633912205696106, "learning_rate": 1e-06, "loss": -0.0929, "step": 991 }, { "clip_ratio/high_max": 0.0034121552744181827, "clip_ratio/high_mean": 0.0012064853326592129, "clip_ratio/low_mean": 0.0014180677972035483, "clip_ratio/low_min": 9.379379298479762e-05, "clip_ratio/region_mean": 0.0026245531262247823, "epoch": 2.3172936716243804, "grad_norm": 0.3005678355693817, "learning_rate": 1e-06, "loss": -0.093, "step": 992 }, { "clip_ratio/high_max": 0.002488452875695657, "clip_ratio/high_mean": 0.0008932439122872893, "clip_ratio/low_mean": 0.0005184338533581467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001411677738360595, "completions/clipped_ratio": 0.1908482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3472.0, "completions/mean_length": 1281.1328125, "completions/mean_terminated_length": 617.21240234375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.3196267133275006, "grad_norm": 0.3225542902946472, "learning_rate": 1e-06, "loss": -0.0759, "num_tokens": 152188867.0, "reward": 0.543526828289032, "reward_std": 0.16931875050067902, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 993 }, { "clip_ratio/high_max": 0.0033031440470949747, "clip_ratio/high_mean": 0.001110712513764156, "clip_ratio/low_mean": 0.0006985536992942798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018092661994160153, "epoch": 2.3219597550306212, "grad_norm": 0.4043016731739044, "learning_rate": 1e-06, "loss": -0.0761, "step": 994 }, { "clip_ratio/high_max": 0.003438280677073635, "clip_ratio/high_mean": 0.0011509969881444704, "clip_ratio/low_mean": 0.0008118735868265503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019628705485956743, "epoch": 2.3242927967337415, "grad_norm": 0.4648806154727936, "learning_rate": 1e-06, "loss": -0.0762, "step": 995 }, { "clip_ratio/high_max": 0.003114584367722273, "clip_ratio/high_mean": 0.0010607460390019696, "clip_ratio/low_mean": 0.0010222924975096248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00208303848921787, "epoch": 2.326625838436862, "grad_norm": 0.26296645402908325, "learning_rate": 1e-06, "loss": -0.0764, "step": 996 }, { "clip_ratio/high_max": 0.002417671727016568, "clip_ratio/high_mean": 0.0009193738933390705, "clip_ratio/low_mean": 0.000763972569075122, "clip_ratio/low_min": 3.9729837226332165e-05, "clip_ratio/region_mean": 0.0016833464505907614, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 1117.6038818359375, "completions/mean_terminated_length": 643.6804809570312, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 2.3289588801399823, "grad_norm": 0.4194320738315582, "learning_rate": 1e-06, "loss": -0.0345, "num_tokens": 152769496.0, "reward": 0.5770089626312256, "reward_std": 0.1735171675682068, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 997 }, { "clip_ratio/high_max": 0.0024179109277611133, "clip_ratio/high_mean": 0.0009443969120184192, "clip_ratio/low_mean": 0.000816002096144075, "clip_ratio/low_min": 5.6918228438007645e-05, "clip_ratio/region_mean": 0.0017603990345378406, "epoch": 2.331291921843103, "grad_norm": 3.264232635498047, "learning_rate": 1e-06, "loss": -0.0345, "step": 998 }, { "clip_ratio/high_max": 0.0025948119946406223, "clip_ratio/high_mean": 0.0009935917623806745, "clip_ratio/low_mean": 0.0008966809964476852, "clip_ratio/low_min": 2.4831148039083928e-05, "clip_ratio/region_mean": 0.0018902727169916034, "epoch": 2.3336249635462236, "grad_norm": 0.9257593750953674, "learning_rate": 1e-06, "loss": -0.0348, "step": 999 }, { "clip_ratio/high_max": 0.0028997113186051138, "clip_ratio/high_mean": 0.0010146557060579653, "clip_ratio/low_mean": 0.001037215364704025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020518710553005803, "epoch": 2.335958005249344, "grad_norm": 0.2971794009208679, "learning_rate": 1e-06, "loss": -0.035, "step": 1000 }, { "clip_ratio/high_max": 0.002549213862948818, "clip_ratio/high_mean": 0.0010256611130898818, "clip_ratio/low_mean": 0.0005741518498325604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001599812942004064, "completions/clipped_ratio": 0.1975446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 1316.390625, "completions/mean_terminated_length": 632.11962890625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 2.338291046952464, "grad_norm": 0.42819470167160034, "learning_rate": 1e-06, "loss": -0.0633, "num_tokens": 153315830.0, "reward": 0.535714328289032, "reward_std": 0.17194482684135437, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1001 }, { "clip_ratio/high_max": 0.002779672657197807, "clip_ratio/high_mean": 0.001210143745993264, "clip_ratio/low_mean": 0.000770265960454708, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001980409688258078, "epoch": 2.3406240886555847, "grad_norm": 0.5305830240249634, "learning_rate": 1e-06, "loss": -0.0636, "step": 1002 }, { "clip_ratio/high_max": 0.003190549647115404, "clip_ratio/high_mean": 0.0012680522759183077, "clip_ratio/low_mean": 0.0009072630491573364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002175315334170591, "epoch": 2.3429571303587053, "grad_norm": 0.26097580790519714, "learning_rate": 1e-06, "loss": -0.0637, "step": 1003 }, { "clip_ratio/high_max": 0.003076888679061085, "clip_ratio/high_mean": 0.0012832308166252915, "clip_ratio/low_mean": 0.0011237022426939802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024069330756901763, "epoch": 2.3452901720618256, "grad_norm": 0.2689996361732483, "learning_rate": 1e-06, "loss": -0.0639, "step": 1004 }, { "clip_ratio/high_max": 0.0032212520818575285, "clip_ratio/high_mean": 0.001396600529915304, "clip_ratio/low_mean": 0.0007647566353625734, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021613571589114144, "completions/clipped_ratio": 0.1908482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3927.0, "completions/mean_length": 1290.0379638671875, "completions/mean_terminated_length": 628.2179565429688, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 2.347623213764946, "grad_norm": 0.6441253423690796, "learning_rate": 1e-06, "loss": -0.0758, "num_tokens": 153856616.0, "reward": 0.5736607313156128, "reward_std": 0.20475134253501892, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 1005 }, { "clip_ratio/high_max": 0.0038764553028158844, "clip_ratio/high_mean": 0.0016574255751038436, "clip_ratio/low_mean": 0.0011169544559379574, "clip_ratio/low_min": 1.6551906810491346e-05, "clip_ratio/region_mean": 0.002774380009213928, "epoch": 2.3499562554680664, "grad_norm": 0.3867473900318146, "learning_rate": 1e-06, "loss": -0.0761, "step": 1006 }, { "clip_ratio/high_max": 0.003760096507903654, "clip_ratio/high_mean": 0.0016148477443493903, "clip_ratio/low_mean": 0.0012356840088614263, "clip_ratio/low_min": 1.2395874364301562e-05, "clip_ratio/region_mean": 0.0028505318259703927, "epoch": 2.352289297171187, "grad_norm": 0.31367841362953186, "learning_rate": 1e-06, "loss": -0.0763, "step": 1007 }, { "clip_ratio/high_max": 0.0034999904601136222, "clip_ratio/high_mean": 0.0015886338678683387, "clip_ratio/low_mean": 0.0013848905728082173, "clip_ratio/low_min": 2.4791748728603125e-05, "clip_ratio/region_mean": 0.0029735244752373546, "epoch": 2.3546223388743073, "grad_norm": 0.464984267950058, "learning_rate": 1e-06, "loss": -0.0764, "step": 1008 }, { "clip_ratio/high_max": 0.0023474985573557205, "clip_ratio/high_mean": 0.0008604757367720595, "clip_ratio/low_mean": 0.0004965547950632754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013570305563916918, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 1207.3482666015625, "completions/mean_terminated_length": 631.1646728515625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.356955380577428, "grad_norm": 0.47724905610084534, "learning_rate": 1e-06, "loss": -0.0625, "num_tokens": 154413232.0, "reward": 0.5814732313156128, "reward_std": 0.1616523265838623, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1009 }, { "clip_ratio/high_max": 0.0025090486378758214, "clip_ratio/high_mean": 0.0009497363480477361, "clip_ratio/low_mean": 0.0006324056093944819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001582142009283416, "epoch": 2.359288422280548, "grad_norm": 0.39060601592063904, "learning_rate": 1e-06, "loss": -0.0628, "step": 1010 }, { "clip_ratio/high_max": 0.0027713703966583125, "clip_ratio/high_mean": 0.0010154219253308838, "clip_ratio/low_mean": 0.0008564775189370266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018718994542723522, "epoch": 2.361621463983669, "grad_norm": 1.431813359260559, "learning_rate": 1e-06, "loss": -0.0628, "step": 1011 }, { "clip_ratio/high_max": 0.0025124282619799487, "clip_ratio/high_mean": 0.0008822830150165828, "clip_ratio/low_mean": 0.0008730381450732239, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017553211364429444, "epoch": 2.363954505686789, "grad_norm": 0.2552565336227417, "learning_rate": 1e-06, "loss": -0.0629, "step": 1012 }, { "clip_ratio/high_max": 0.002343948326597456, "clip_ratio/high_mean": 0.0008589890730945626, "clip_ratio/low_mean": 0.0007976667329785414, "clip_ratio/low_min": 6.340220170386601e-05, "clip_ratio/region_mean": 0.0016566557751502842, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3909.0, "completions/mean_length": 1272.2701416015625, "completions/mean_terminated_length": 713.564208984375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 2.3662875473899097, "grad_norm": 1.2154748439788818, "learning_rate": 1e-06, "loss": -0.0259, "num_tokens": 155029434.0, "reward": 0.486607164144516, "reward_std": 0.18565760552883148, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1013 }, { "clip_ratio/high_max": 0.003020567608473357, "clip_ratio/high_mean": 0.0010814763973030495, "clip_ratio/low_mean": 0.0010071384713228326, "clip_ratio/low_min": 8.075889036263106e-05, "clip_ratio/region_mean": 0.002088614863168914, "epoch": 2.36862058909303, "grad_norm": 0.8156465291976929, "learning_rate": 1e-06, "loss": -0.026, "step": 1014 }, { "clip_ratio/high_max": 0.0025441844045417383, "clip_ratio/high_mean": 0.0009865956535577425, "clip_ratio/low_mean": 0.0011564873020688538, "clip_ratio/low_min": 5.785310258943355e-05, "clip_ratio/region_mean": 0.0021430829947348684, "epoch": 2.3709536307961505, "grad_norm": 0.3282146453857422, "learning_rate": 1e-06, "loss": -0.0262, "step": 1015 }, { "clip_ratio/high_max": 0.002688372573175002, "clip_ratio/high_mean": 0.0010582581526250578, "clip_ratio/low_mean": 0.0013359894837776665, "clip_ratio/low_min": 9.320175740867853e-05, "clip_ratio/region_mean": 0.0023942476473166607, "epoch": 2.3732866724992707, "grad_norm": 0.4723837971687317, "learning_rate": 1e-06, "loss": -0.0265, "step": 1016 }, { "clip_ratio/high_max": 0.0023975085860001855, "clip_ratio/high_mean": 0.0009403131052749814, "clip_ratio/low_mean": 0.0008441852114629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017844982940005139, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2385.0, "completions/mean_length": 1143.44873046875, "completions/mean_terminated_length": 624.2335815429688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.3756197142023914, "grad_norm": 1.1150003671646118, "learning_rate": 1e-06, "loss": -0.0684, "num_tokens": 155590132.0, "reward": 0.5870535969734192, "reward_std": 0.17356246709823608, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 1017 }, { "clip_ratio/high_max": 0.0023746612641843967, "clip_ratio/high_mean": 0.0010519171428313712, "clip_ratio/low_mean": 0.0010436583052069182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020955754444003105, "epoch": 2.377952755905512, "grad_norm": 0.35146912932395935, "learning_rate": 1e-06, "loss": -0.0689, "step": 1018 }, { "clip_ratio/high_max": 0.002574287042079959, "clip_ratio/high_mean": 0.001095955807613791, "clip_ratio/low_mean": 0.0010886510608543176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021846068630111404, "epoch": 2.3802857976086322, "grad_norm": 0.566755473613739, "learning_rate": 1e-06, "loss": -0.0691, "step": 1019 }, { "clip_ratio/high_max": 0.002869894298783038, "clip_ratio/high_mean": 0.0011550411818461725, "clip_ratio/low_mean": 0.0013004776719753863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024555188283557072, "epoch": 2.382618839311753, "grad_norm": 0.35496124625205994, "learning_rate": 1e-06, "loss": -0.0691, "step": 1020 }, { "clip_ratio/high_max": 0.002378178407525411, "clip_ratio/high_mean": 0.0008765605271037202, "clip_ratio/low_mean": 0.0006781857773603406, "clip_ratio/low_min": 3.869170541292988e-05, "clip_ratio/region_mean": 0.0015547462899121456, "completions/clipped_ratio": 0.1953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3543.0, "completions/mean_length": 1372.7020263671875, "completions/mean_terminated_length": 711.7073974609375, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 2.384951881014873, "grad_norm": 0.31459930539131165, "learning_rate": 1e-06, "loss": -0.0627, "num_tokens": 156196969.0, "reward": 0.504464328289032, "reward_std": 0.16304031014442444, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 1021 }, { "clip_ratio/high_max": 0.002528465338400565, "clip_ratio/high_mean": 0.0009805062491068384, "clip_ratio/low_mean": 0.0008077849870460341, "clip_ratio/low_min": 3.859215939883143e-05, "clip_ratio/region_mean": 0.0017882912361528724, "epoch": 2.3872849227179938, "grad_norm": 1.2926280498504639, "learning_rate": 1e-06, "loss": -0.0369, "step": 1022 }, { "clip_ratio/high_max": 0.0028573572417371906, "clip_ratio/high_mean": 0.001060835213138489, "clip_ratio/low_mean": 0.0009208857263729442, "clip_ratio/low_min": 3.869170541292988e-05, "clip_ratio/region_mean": 0.0019817209540633485, "epoch": 2.389617964421114, "grad_norm": 0.334761381149292, "learning_rate": 1e-06, "loss": -0.0627, "step": 1023 }, { "clip_ratio/high_max": 0.002693070753593929, "clip_ratio/high_mean": 0.0009774712307262234, "clip_ratio/low_mean": 0.001092622711439617, "clip_ratio/low_min": 5.158894055057317e-05, "clip_ratio/region_mean": 0.00207009391306201, "epoch": 2.3919510061242346, "grad_norm": 0.23134131729602814, "learning_rate": 1e-06, "loss": -0.0631, "step": 1024 }, { "clip_ratio/high_max": 0.003360801412782166, "clip_ratio/high_mean": 0.0012678354578383733, "clip_ratio/low_mean": 0.0007914999741842621, "clip_ratio/low_min": 2.810251862683799e-05, "clip_ratio/region_mean": 0.002059335405647289, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3753.0, "completions/mean_length": 1202.4498291015625, "completions/mean_terminated_length": 648.3656616210938, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.394284047827355, "grad_norm": 0.5461203455924988, "learning_rate": 1e-06, "loss": -0.0632, "num_tokens": 156773052.0, "reward": 0.5546875, "reward_std": 0.1937377154827118, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1025 }, { "clip_ratio/high_max": 0.003225434171326924, "clip_ratio/high_mean": 0.0013231091033958364, "clip_ratio/low_mean": 0.0009718003557281918, "clip_ratio/low_min": 6.196877802722156e-05, "clip_ratio/region_mean": 0.002294909361808095, "epoch": 2.3966170895304755, "grad_norm": 2.4409067630767822, "learning_rate": 1e-06, "loss": -0.0631, "step": 1026 }, { "clip_ratio/high_max": 0.0034342590224696323, "clip_ratio/high_mean": 0.0013042530481470749, "clip_ratio/low_mean": 0.0010976359981214046, "clip_ratio/low_min": 5.201493240747368e-05, "clip_ratio/region_mean": 0.0024018891126615927, "epoch": 2.3989501312335957, "grad_norm": 0.32167816162109375, "learning_rate": 1e-06, "loss": -0.0635, "step": 1027 }, { "clip_ratio/high_max": 0.003160504773404682, "clip_ratio/high_mean": 0.0012710246264759917, "clip_ratio/low_mean": 0.0012066720555594657, "clip_ratio/low_min": 1.4051259313418996e-05, "clip_ratio/region_mean": 0.0024776966602075845, "epoch": 2.4012831729367163, "grad_norm": 0.3319133222103119, "learning_rate": 1e-06, "loss": -0.0636, "step": 1028 }, { "clip_ratio/high_max": 0.002681492711417377, "clip_ratio/high_mean": 0.0011625248043856118, "clip_ratio/low_mean": 0.0006230980397958774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001785622866009362, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 1093.235595703125, "completions/mean_terminated_length": 619.9315185546875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.4036162146398365, "grad_norm": 0.35158082842826843, "learning_rate": 1e-06, "loss": -0.0781, "num_tokens": 157337783.0, "reward": 0.5792410969734192, "reward_std": 0.19618017971515656, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1029 }, { "clip_ratio/high_max": 0.002852615580195561, "clip_ratio/high_mean": 0.0013030403933953494, "clip_ratio/low_mean": 0.0007578601671411889, "clip_ratio/low_min": 1.1419696420489345e-05, "clip_ratio/region_mean": 0.0020609005878213793, "epoch": 2.405949256342957, "grad_norm": 0.4239773750305176, "learning_rate": 1e-06, "loss": -0.0783, "step": 1030 }, { "clip_ratio/high_max": 0.002983951329952106, "clip_ratio/high_mean": 0.0013848326561856084, "clip_ratio/low_mean": 0.0008447586624242831, "clip_ratio/low_min": 2.0028841390740126e-05, "clip_ratio/region_mean": 0.002229591271316167, "epoch": 2.4082822980460774, "grad_norm": 0.2894873321056366, "learning_rate": 1e-06, "loss": -0.0784, "step": 1031 }, { "clip_ratio/high_max": 0.002903019565565046, "clip_ratio/high_mean": 0.0012929901386087295, "clip_ratio/low_mean": 0.0010902491940214531, "clip_ratio/low_min": 2.283939284097869e-05, "clip_ratio/region_mean": 0.0023832392325857654, "epoch": 2.410615339749198, "grad_norm": 0.7873396873474121, "learning_rate": 1e-06, "loss": -0.0785, "step": 1032 }, { "clip_ratio/high_max": 0.002579146144853439, "clip_ratio/high_mean": 0.001098236272810027, "clip_ratio/low_mean": 0.0005881738543394022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001686410130787408, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 1190.7154541015625, "completions/mean_terminated_length": 638.9814453125, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 2.4129483814523183, "grad_norm": 0.37567490339279175, "learning_rate": 1e-06, "loss": -0.0719, "num_tokens": 157910472.0, "reward": 0.5714285969734192, "reward_std": 0.15405938029289246, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1033 }, { "clip_ratio/high_max": 0.0026781482229125686, "clip_ratio/high_mean": 0.001145971056757844, "clip_ratio/low_mean": 0.000630951346920483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017769223959476221, "epoch": 2.415281423155439, "grad_norm": 1.0642943382263184, "learning_rate": 1e-06, "loss": -0.0719, "step": 1034 }, { "clip_ratio/high_max": 0.002911971452704165, "clip_ratio/high_mean": 0.0012003284591628471, "clip_ratio/low_mean": 0.000815371890894312, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020157004037173465, "epoch": 2.417614464858559, "grad_norm": 1.6225624084472656, "learning_rate": 1e-06, "loss": -0.0722, "step": 1035 }, { "clip_ratio/high_max": 0.003114605446171481, "clip_ratio/high_mean": 0.001245665631358861, "clip_ratio/low_mean": 0.0007963241123434273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002041989726421889, "epoch": 2.41994750656168, "grad_norm": 0.22994910180568695, "learning_rate": 1e-06, "loss": -0.0723, "step": 1036 }, { "clip_ratio/high_max": 0.003108551260083914, "clip_ratio/high_mean": 0.001248619595571654, "clip_ratio/low_mean": 0.0007474551548511954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019960747595177963, "completions/clipped_ratio": 0.1908482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3938.0, "completions/mean_length": 1311.173095703125, "completions/mean_terminated_length": 654.3379516601562, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.4222805482648004, "grad_norm": 0.4473035931587219, "learning_rate": 1e-06, "loss": -0.0931, "num_tokens": 158476499.0, "reward": 0.4988839626312256, "reward_std": 0.19700363278388977, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1037 }, { "clip_ratio/high_max": 0.0034619336802279577, "clip_ratio/high_mean": 0.0014040771820873488, "clip_ratio/low_mean": 0.0010524353328946745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024565125568187796, "epoch": 2.4246135899679206, "grad_norm": 0.5201957821846008, "learning_rate": 1e-06, "loss": -0.0933, "step": 1038 }, { "clip_ratio/high_max": 0.0034965277736773714, "clip_ratio/high_mean": 0.0014138645565253682, "clip_ratio/low_mean": 0.001104110138840042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025179746517096646, "epoch": 2.4269466316710413, "grad_norm": 0.41464993357658386, "learning_rate": 1e-06, "loss": -0.0935, "step": 1039 }, { "clip_ratio/high_max": 0.0036577012433554046, "clip_ratio/high_mean": 0.001395674109517131, "clip_ratio/low_mean": 0.0013931827052147128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027888568220078014, "epoch": 2.4292796733741615, "grad_norm": 0.4467448890209198, "learning_rate": 1e-06, "loss": -0.0936, "step": 1040 }, { "clip_ratio/high_max": 0.002716258790314896, "clip_ratio/high_mean": 0.0010300570647814311, "clip_ratio/low_mean": 0.0006646649480899214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016947220065048896, "completions/clipped_ratio": 0.1919642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2748.0, "completions/mean_length": 1303.399658203125, "completions/mean_terminated_length": 639.964111328125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.431612715077282, "grad_norm": 6.749979019165039, "learning_rate": 1e-06, "loss": -0.083, "num_tokens": 159035113.0, "reward": 0.5212053656578064, "reward_std": 0.17389805614948273, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1041 }, { "clip_ratio/high_max": 0.0030562135434593074, "clip_ratio/high_mean": 0.0010819600938702933, "clip_ratio/low_mean": 0.0006037375142113888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016856975562404841, "epoch": 2.4339457567804024, "grad_norm": 0.6198983192443848, "learning_rate": 1e-06, "loss": -0.0833, "step": 1042 }, { "clip_ratio/high_max": 0.0029400190032902174, "clip_ratio/high_mean": 0.0010901350397034548, "clip_ratio/low_mean": 0.0007948751608637394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018850102278520353, "epoch": 2.436278798483523, "grad_norm": 0.4022597074508667, "learning_rate": 1e-06, "loss": -0.0834, "step": 1043 }, { "clip_ratio/high_max": 0.0030451500351773575, "clip_ratio/high_mean": 0.0012415694945957512, "clip_ratio/low_mean": 0.0009858591765805613, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022274286529864185, "epoch": 2.4386118401866432, "grad_norm": 1.0695329904556274, "learning_rate": 1e-06, "loss": -0.0836, "step": 1044 }, { "clip_ratio/high_max": 0.001854061039921362, "clip_ratio/high_mean": 0.0006105432494223351, "clip_ratio/low_mean": 0.0005817153887619497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011922586418222636, "completions/clipped_ratio": 0.1752232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3568.0, "completions/mean_length": 1243.3795166015625, "completions/mean_terminated_length": 637.3423461914062, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 2.440944881889764, "grad_norm": 0.34013548493385315, "learning_rate": 1e-06, "loss": -0.0571, "num_tokens": 159594949.0, "reward": 0.5078125, "reward_std": 0.13339446485042572, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1045 }, { "clip_ratio/high_max": 0.0024698038250789978, "clip_ratio/high_mean": 0.0007397484259854537, "clip_ratio/low_mean": 0.0007992378523340449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015389862819574773, "epoch": 2.443277923592884, "grad_norm": 0.3012835383415222, "learning_rate": 1e-06, "loss": -0.0572, "step": 1046 }, { "clip_ratio/high_max": 0.0020748900205944665, "clip_ratio/high_mean": 0.0006918650315128616, "clip_ratio/low_mean": 0.0008197399292839691, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015116049617063254, "epoch": 2.4456109652960047, "grad_norm": 0.2306586354970932, "learning_rate": 1e-06, "loss": -0.0573, "step": 1047 }, { "clip_ratio/high_max": 0.0024131105674314313, "clip_ratio/high_mean": 0.0007677185349166393, "clip_ratio/low_mean": 0.0010261081024509622, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017938266100827605, "epoch": 2.447944006999125, "grad_norm": 0.22289088368415833, "learning_rate": 1e-06, "loss": -0.0575, "step": 1048 }, { "clip_ratio/high_max": 0.0024569282795710023, "clip_ratio/high_mean": 0.0008644164054203429, "clip_ratio/low_mean": 0.0006574705339517095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015218869521049783, "completions/clipped_ratio": 0.2008928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 1302.841552734375, "completions/mean_terminated_length": 600.6508178710938, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 2.4502770487022456, "grad_norm": 1.1771725416183472, "learning_rate": 1e-06, "loss": -0.0935, "num_tokens": 160109823.0, "reward": 0.5558035969734192, "reward_std": 0.15003500878810883, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 1049 }, { "clip_ratio/high_max": 0.002881484826502856, "clip_ratio/high_mean": 0.0009982598330680048, "clip_ratio/low_mean": 0.0009318079519289313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019300677813589573, "epoch": 2.452610090405366, "grad_norm": 0.31274810433387756, "learning_rate": 1e-06, "loss": -0.0938, "step": 1050 }, { "clip_ratio/high_max": 0.002638650745211635, "clip_ratio/high_mean": 0.0009893849746731576, "clip_ratio/low_mean": 0.0010363648834754713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002025749839958735, "epoch": 2.4549431321084865, "grad_norm": 1.2951401472091675, "learning_rate": 1e-06, "loss": -0.0938, "step": 1051 }, { "clip_ratio/high_max": 0.002682248246856034, "clip_ratio/high_mean": 0.0009567232282279292, "clip_ratio/low_mean": 0.0011453966781118652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002102119913615752, "epoch": 2.457276173811607, "grad_norm": 14.844244003295898, "learning_rate": 1e-06, "loss": -0.0934, "step": 1052 }, { "clip_ratio/high_max": 0.0021853860016562976, "clip_ratio/high_mean": 0.000819423898064997, "clip_ratio/low_mean": 0.0006705536952722468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014899775887897704, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 1278.962158203125, "completions/mean_terminated_length": 643.1053466796875, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 2.4596092155147273, "grad_norm": 0.8149164319038391, "learning_rate": 1e-06, "loss": -0.0377, "num_tokens": 160674165.0, "reward": 0.5401785969734192, "reward_std": 0.1462061107158661, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 1053 }, { "clip_ratio/high_max": 0.002716302828048356, "clip_ratio/high_mean": 0.0009960636743926443, "clip_ratio/low_mean": 0.0008860328489390668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018820965487975627, "epoch": 2.4619422572178475, "grad_norm": 0.37720391154289246, "learning_rate": 1e-06, "loss": -0.0378, "step": 1054 }, { "clip_ratio/high_max": 0.0024373573614866473, "clip_ratio/high_mean": 0.0009141904156422243, "clip_ratio/low_mean": 0.0009445492923987331, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018587396771181375, "epoch": 2.464275298920968, "grad_norm": 0.2817417085170746, "learning_rate": 1e-06, "loss": -0.038, "step": 1055 }, { "clip_ratio/high_max": 0.0022038811875972897, "clip_ratio/high_mean": 0.000833103807963198, "clip_ratio/low_mean": 0.0011378309536667075, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001970934790733736, "epoch": 2.466608340624089, "grad_norm": 0.29444506764411926, "learning_rate": 1e-06, "loss": -0.0381, "step": 1056 }, { "clip_ratio/high_max": 0.0022828902656328864, "clip_ratio/high_mean": 0.0008722466136532603, "clip_ratio/low_mean": 0.0004847733216593042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001357019955321448, "completions/clipped_ratio": 0.1919642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 1313.7445068359375, "completions/mean_terminated_length": 652.7666015625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.468941382327209, "grad_norm": 0.32517901062965393, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 161235512.0, "reward": 0.5267857313156128, "reward_std": 0.1426345854997635, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608329772949, "step": 1057 }, { "clip_ratio/high_max": 0.002340142302273307, "clip_ratio/high_mean": 0.0008573714912927244, "clip_ratio/low_mean": 0.0006355787313623296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014929502358427271, "epoch": 2.4712744240303297, "grad_norm": 1.303302526473999, "learning_rate": 1e-06, "loss": -0.0577, "step": 1058 }, { "clip_ratio/high_max": 0.0025418412915314548, "clip_ratio/high_mean": 0.0008899557924451074, "clip_ratio/low_mean": 0.0007363103593434062, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016262661774817389, "epoch": 2.47360746573345, "grad_norm": 2.903837203979492, "learning_rate": 1e-06, "loss": -0.0577, "step": 1059 }, { "clip_ratio/high_max": 0.0022629995946772397, "clip_ratio/high_mean": 0.0007724389615759719, "clip_ratio/low_mean": 0.0007414091776354326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00151384813398181, "epoch": 2.4759405074365706, "grad_norm": 0.30199334025382996, "learning_rate": 1e-06, "loss": -0.0579, "step": 1060 }, { "clip_ratio/high_max": 0.003308682622446213, "clip_ratio/high_mean": 0.0013969617248221766, "clip_ratio/low_mean": 0.0006770832369511481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020740449617733248, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3194.0, "completions/mean_length": 1237.50341796875, "completions/mean_terminated_length": 616.091064453125, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 2.478273549139691, "grad_norm": 0.651075541973114, "learning_rate": 1e-06, "loss": -0.0702, "num_tokens": 161772299.0, "reward": 0.5837053656578064, "reward_std": 0.20644131302833557, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1061 }, { "clip_ratio/high_max": 0.0036067981491214596, "clip_ratio/high_mean": 0.001528293258161284, "clip_ratio/low_mean": 0.0008718824028619565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024001756464713253, "epoch": 2.4806065908428114, "grad_norm": 0.36767756938934326, "learning_rate": 1e-06, "loss": -0.0705, "step": 1062 }, { "clip_ratio/high_max": 0.003762575113796629, "clip_ratio/high_mean": 0.0015718592403572984, "clip_ratio/low_mean": 0.0009934969239111524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025653562261140905, "epoch": 2.4829396325459316, "grad_norm": 5.580252647399902, "learning_rate": 1e-06, "loss": -0.0703, "step": 1063 }, { "clip_ratio/high_max": 0.003088929552177433, "clip_ratio/high_mean": 0.0013996390080137644, "clip_ratio/low_mean": 0.001137098817707738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002536737811169587, "epoch": 2.4852726742490523, "grad_norm": 0.47244471311569214, "learning_rate": 1e-06, "loss": -0.0706, "step": 1064 }, { "clip_ratio/high_max": 0.002923132669820916, "clip_ratio/high_mean": 0.0010396727648185333, "clip_ratio/low_mean": 0.0006197584198162076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001659431178268278, "completions/clipped_ratio": 0.1774553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 1256.872802734375, "completions/mean_terminated_length": 644.3609619140625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 2.4876057159521725, "grad_norm": 0.6805027723312378, "learning_rate": 1e-06, "loss": -0.0878, "num_tokens": 162333401.0, "reward": 0.5558035969734192, "reward_std": 0.17032833397388458, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1065 }, { "clip_ratio/high_max": 0.0028623156213143375, "clip_ratio/high_mean": 0.0009378493468830129, "clip_ratio/low_mean": 0.0008115148575598141, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017493642153567635, "epoch": 2.489938757655293, "grad_norm": 0.2885662019252777, "learning_rate": 1e-06, "loss": -0.088, "step": 1066 }, { "clip_ratio/high_max": 0.003075960270507494, "clip_ratio/high_mean": 0.0010739747085608542, "clip_ratio/low_mean": 0.0008992060411401326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019731808060896583, "epoch": 2.4922717993584134, "grad_norm": 0.2675045430660248, "learning_rate": 1e-06, "loss": -0.0881, "step": 1067 }, { "clip_ratio/high_max": 0.0029786025334033184, "clip_ratio/high_mean": 0.0010591352111077867, "clip_ratio/low_mean": 0.0010710793358157389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002130214517819695, "epoch": 2.494604841061534, "grad_norm": 0.6113716959953308, "learning_rate": 1e-06, "loss": -0.0882, "step": 1068 }, { "clip_ratio/high_max": 0.002666201973624993, "clip_ratio/high_mean": 0.0010773547619464807, "clip_ratio/low_mean": 0.0007055895102894283, "clip_ratio/low_min": 3.6971308873035014e-05, "clip_ratio/region_mean": 0.0017829442549555097, "completions/clipped_ratio": 0.2310267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2766.0, "completions/mean_length": 1483.575927734375, "completions/mean_terminated_length": 698.711181640625, "completions/min_length": 202.0, "completions/min_terminated_length": 202.0, "epoch": 2.4969378827646542, "grad_norm": 3.3077614307403564, "learning_rate": 1e-06, "loss": -0.0918, "num_tokens": 162913789.0, "reward": 0.4899553656578064, "reward_std": 0.19565841555595398, "rewards/verify_math_reward/mean": 0.4899553656578064, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1069 }, { "clip_ratio/high_max": 0.003094484272878617, "clip_ratio/high_mean": 0.0011558727783267386, "clip_ratio/low_mean": 0.0006511584524560021, "clip_ratio/low_min": 1.3718174159293994e-05, "clip_ratio/region_mean": 0.0018070312507916242, "epoch": 2.499270924467775, "grad_norm": 0.6611237525939941, "learning_rate": 1e-06, "loss": -0.092, "step": 1070 }, { "clip_ratio/high_max": 0.0031754387309774756, "clip_ratio/high_mean": 0.0012789847096428275, "clip_ratio/low_mean": 0.0008848344032230671, "clip_ratio/low_min": 2.9790277039865032e-05, "clip_ratio/region_mean": 0.0021638190883095376, "epoch": 2.5016039661708955, "grad_norm": 0.3469637334346771, "learning_rate": 1e-06, "loss": -0.0924, "step": 1071 }, { "clip_ratio/high_max": 0.0034965145314345136, "clip_ratio/high_mean": 0.0012520961136033293, "clip_ratio/low_mean": 0.000996719882095931, "clip_ratio/low_min": 2.743634831858799e-05, "clip_ratio/region_mean": 0.002248815944767557, "epoch": 2.5039370078740157, "grad_norm": 0.2789199650287628, "learning_rate": 1e-06, "loss": -0.0925, "step": 1072 }, { "clip_ratio/high_max": 0.002522102789953351, "clip_ratio/high_mean": 0.0010346790204494027, "clip_ratio/low_mean": 0.0004573142405206454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014919932727934793, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3625.0, "completions/mean_length": 1233.044677734375, "completions/mean_terminated_length": 680.2769165039062, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.506270049577136, "grad_norm": 1.1945894956588745, "learning_rate": 1e-06, "loss": -0.0651, "num_tokens": 163509517.0, "reward": 0.5848214626312256, "reward_std": 0.1579635888338089, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1073 }, { "clip_ratio/high_max": 0.0027828948295791633, "clip_ratio/high_mean": 0.00107886926161882, "clip_ratio/low_mean": 0.0006147858521217131, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016936551328399219, "epoch": 2.5086030912802566, "grad_norm": 1.1623624563217163, "learning_rate": 1e-06, "loss": -0.0657, "step": 1074 }, { "clip_ratio/high_max": 0.00306050424114801, "clip_ratio/high_mean": 0.0011504346293804701, "clip_ratio/low_mean": 0.0007578861104775569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001908320773509331, "epoch": 2.5109361329833773, "grad_norm": 0.3030674457550049, "learning_rate": 1e-06, "loss": -0.0659, "step": 1075 }, { "clip_ratio/high_max": 0.0030204920476535335, "clip_ratio/high_mean": 0.0011199106447747909, "clip_ratio/low_mean": 0.0008408051253354643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001960715773748234, "epoch": 2.5132691746864975, "grad_norm": 0.3003896474838257, "learning_rate": 1e-06, "loss": -0.0659, "step": 1076 }, { "clip_ratio/high_max": 0.003104200070083607, "clip_ratio/high_mean": 0.0012098361839889549, "clip_ratio/low_mean": 0.0006270740132094943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018369102217548061, "completions/clipped_ratio": 0.2198660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3389.0, "completions/mean_length": 1428.1876220703125, "completions/mean_terminated_length": 676.3147583007812, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.515602216389618, "grad_norm": 0.3916563093662262, "learning_rate": 1e-06, "loss": -0.1166, "num_tokens": 164076037.0, "reward": 0.4877232313156128, "reward_std": 0.18626311421394348, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.500128448009491, "step": 1077 }, { "clip_ratio/high_max": 0.0033082060544984415, "clip_ratio/high_mean": 0.001241679819941055, "clip_ratio/low_mean": 0.0007198265504939627, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019615063720266335, "epoch": 2.5179352580927383, "grad_norm": 2.1258981227874756, "learning_rate": 1e-06, "loss": -0.1164, "step": 1078 }, { "clip_ratio/high_max": 0.0035955186394858174, "clip_ratio/high_mean": 0.0012513943383964943, "clip_ratio/low_mean": 0.0008822432778288203, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002133637601218652, "epoch": 2.520268299795859, "grad_norm": 3.1256260871887207, "learning_rate": 1e-06, "loss": -0.1166, "step": 1079 }, { "clip_ratio/high_max": 0.0029033242390141822, "clip_ratio/high_mean": 0.0011112984848296037, "clip_ratio/low_mean": 0.0008066580830927705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019179565861122683, "epoch": 2.522601341498979, "grad_norm": 0.6186057925224304, "learning_rate": 1e-06, "loss": -0.1164, "step": 1080 }, { "clip_ratio/high_max": 0.002214796626503812, "clip_ratio/high_mean": 0.000847441629957757, "clip_ratio/low_mean": 0.0007942430311231874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016416846810898278, "completions/clipped_ratio": 0.2366071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 1520.3929443359375, "completions/mean_terminated_length": 722.1052856445312, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.5249343832021, "grad_norm": 0.3437739312648773, "learning_rate": 1e-06, "loss": -0.092, "num_tokens": 164680957.0, "reward": 0.4564732313156128, "reward_std": 0.1738227903842926, "rewards/verify_math_reward/mean": 0.4564732015132904, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1081 }, { "clip_ratio/high_max": 0.0023681455713813193, "clip_ratio/high_mean": 0.0009445407904422609, "clip_ratio/low_mean": 0.0010262897758366307, "clip_ratio/low_min": 1.2378688552416861e-05, "clip_ratio/region_mean": 0.001970830599020701, "epoch": 2.52726742490522, "grad_norm": 0.2701478600502014, "learning_rate": 1e-06, "loss": -0.0923, "step": 1082 }, { "clip_ratio/high_max": 0.0025028549644048326, "clip_ratio/high_mean": 0.0009642882323532831, "clip_ratio/low_mean": 0.001056229040841572, "clip_ratio/low_min": 2.1047204427304678e-05, "clip_ratio/region_mean": 0.0020205172477290034, "epoch": 2.5296004666083407, "grad_norm": 3.0239999294281006, "learning_rate": 1e-06, "loss": -0.0922, "step": 1083 }, { "clip_ratio/high_max": 0.0024725543335080147, "clip_ratio/high_mean": 0.0009423817009519553, "clip_ratio/low_mean": 0.0011413409592933021, "clip_ratio/low_min": 5.572126974584535e-05, "clip_ratio/region_mean": 0.002083722636598395, "epoch": 2.531933508311461, "grad_norm": 0.28323686122894287, "learning_rate": 1e-06, "loss": -0.0924, "step": 1084 }, { "clip_ratio/high_max": 0.0024130911187967286, "clip_ratio/high_mean": 0.0008118570003716741, "clip_ratio/low_mean": 0.0006018565964041045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014137136276985984, "completions/clipped_ratio": 0.2154017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3536.0, "completions/mean_length": 1375.8226318359375, "completions/mean_terminated_length": 629.0313110351562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.5342665500145816, "grad_norm": 0.4908103048801422, "learning_rate": 1e-06, "loss": -0.0734, "num_tokens": 165218254.0, "reward": 0.5390625, "reward_std": 0.14075776934623718, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 1085 }, { "clip_ratio/high_max": 0.002882754954043776, "clip_ratio/high_mean": 0.00102870816772338, "clip_ratio/low_mean": 0.0007477255821868312, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017764337644621264, "epoch": 2.536599591717702, "grad_norm": 0.6790070533752441, "learning_rate": 1e-06, "loss": -0.0735, "step": 1086 }, { "clip_ratio/high_max": 0.0031209253138513304, "clip_ratio/high_mean": 0.0010539491158851888, "clip_ratio/low_mean": 0.0009685377772257198, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020224868567311205, "epoch": 2.5389326334208224, "grad_norm": 0.4112752377986908, "learning_rate": 1e-06, "loss": -0.0737, "step": 1087 }, { "clip_ratio/high_max": 0.003320863703265786, "clip_ratio/high_mean": 0.0010708697300287895, "clip_ratio/low_mean": 0.0011177280034644355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021885977766942233, "epoch": 2.5412656751239426, "grad_norm": 0.4200025796890259, "learning_rate": 1e-06, "loss": -0.0738, "step": 1088 }, { "clip_ratio/high_max": 0.003127195559500251, "clip_ratio/high_mean": 0.0011593409162742319, "clip_ratio/low_mean": 0.0008770003250901937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002036341178609291, "completions/clipped_ratio": 0.2388392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 1484.3270263671875, "completions/mean_terminated_length": 664.8284301757812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.5435987168270633, "grad_norm": 16.201557159423828, "learning_rate": 1e-06, "loss": -0.0965, "num_tokens": 165753803.0, "reward": 0.4799107313156128, "reward_std": 0.2068101018667221, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1089 }, { "clip_ratio/high_max": 0.0028990958744543605, "clip_ratio/high_mean": 0.001132745066570351, "clip_ratio/low_mean": 0.0009376046791658155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020703497721115127, "epoch": 2.545931758530184, "grad_norm": 10.441486358642578, "learning_rate": 1e-06, "loss": -0.0997, "step": 1090 }, { "clip_ratio/high_max": 0.0028235201243660413, "clip_ratio/high_mean": 0.001097883636248298, "clip_ratio/low_mean": 0.001067433080606861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021653167495969683, "epoch": 2.548264800233304, "grad_norm": 0.6041067838668823, "learning_rate": 1e-06, "loss": -0.1003, "step": 1091 }, { "clip_ratio/high_max": 0.003643646923592314, "clip_ratio/high_mean": 0.0013447672936308663, "clip_ratio/low_mean": 0.0012730230209854199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026177902618655935, "epoch": 2.5505978419364244, "grad_norm": 0.5262669324874878, "learning_rate": 1e-06, "loss": -0.1007, "step": 1092 }, { "clip_ratio/high_max": 0.0029348348252824508, "clip_ratio/high_mean": 0.0011955982809013221, "clip_ratio/low_mean": 0.000538516325832461, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017341146158287302, "completions/clipped_ratio": 0.2600446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4021.0, "completions/mean_length": 1615.1004638671875, "completions/mean_terminated_length": 743.2307739257812, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 2.552930883639545, "grad_norm": 0.3676765263080597, "learning_rate": 1e-06, "loss": -0.1079, "num_tokens": 166331373.0, "reward": 0.4486607313156128, "reward_std": 0.18314069509506226, "rewards/verify_math_reward/mean": 0.4486607015132904, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1093 }, { "clip_ratio/high_max": 0.002742572956776712, "clip_ratio/high_mean": 0.0011802748158515897, "clip_ratio/low_mean": 0.0007884828992246184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019687577441800386, "epoch": 2.5552639253426657, "grad_norm": 0.27336665987968445, "learning_rate": 1e-06, "loss": -0.1082, "step": 1094 }, { "clip_ratio/high_max": 0.00333934037189465, "clip_ratio/high_mean": 0.001354593889118405, "clip_ratio/low_mean": 0.0008846300606819568, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022392239843611605, "epoch": 2.557596967045786, "grad_norm": 0.3089798390865326, "learning_rate": 1e-06, "loss": -0.1083, "step": 1095 }, { "clip_ratio/high_max": 0.002996828072355129, "clip_ratio/high_mean": 0.0012221711785969092, "clip_ratio/low_mean": 0.0010915836410276825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023137547977967188, "epoch": 2.5599300087489065, "grad_norm": 0.5087931752204895, "learning_rate": 1e-06, "loss": -0.1083, "step": 1096 }, { "clip_ratio/high_max": 0.002723040153796319, "clip_ratio/high_mean": 0.0011461611647973768, "clip_ratio/low_mean": 0.0008022474776225863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019484086587908678, "completions/clipped_ratio": 0.2087053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 1416.05810546875, "completions/mean_terminated_length": 709.2186279296875, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 2.5622630504520267, "grad_norm": 0.5769084692001343, "learning_rate": 1e-06, "loss": -0.1088, "num_tokens": 166921289.0, "reward": 0.5133928656578064, "reward_std": 0.1964825689792633, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1097 }, { "clip_ratio/high_max": 0.0032770712714409456, "clip_ratio/high_mean": 0.001337845613306854, "clip_ratio/low_mean": 0.0010016010510298656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023394466552417725, "epoch": 2.5645960921551474, "grad_norm": 0.2779761850833893, "learning_rate": 1e-06, "loss": -0.1091, "step": 1098 }, { "clip_ratio/high_max": 0.0034108413019566797, "clip_ratio/high_mean": 0.0013916532188886777, "clip_ratio/low_mean": 0.0011247762267885264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025164294711430557, "epoch": 2.5669291338582676, "grad_norm": 0.3502052426338196, "learning_rate": 1e-06, "loss": -0.1092, "step": 1099 }, { "clip_ratio/high_max": 0.0033276621397817507, "clip_ratio/high_mean": 0.0013544265384553, "clip_ratio/low_mean": 0.0012050925724906847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025595191254979, "epoch": 2.5692621755613883, "grad_norm": 1.348060965538025, "learning_rate": 1e-06, "loss": -0.109, "step": 1100 }, { "clip_ratio/high_max": 0.003261648045736365, "clip_ratio/high_mean": 0.0015112195505935233, "clip_ratio/low_mean": 0.0005168857942408067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002028105292993132, "completions/clipped_ratio": 0.2165178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3224.0, "completions/mean_length": 1405.2835693359375, "completions/mean_terminated_length": 661.6951293945312, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 2.5715952172645085, "grad_norm": 0.42415884137153625, "learning_rate": 1e-06, "loss": -0.0985, "num_tokens": 167483071.0, "reward": 0.5558035969734192, "reward_std": 0.17348577082157135, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1101 }, { "clip_ratio/high_max": 0.0034620553196873516, "clip_ratio/high_mean": 0.001532508023956325, "clip_ratio/low_mean": 0.0008536560217180522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002386164080235176, "epoch": 2.573928258967629, "grad_norm": 0.3332423269748688, "learning_rate": 1e-06, "loss": -0.0987, "step": 1102 }, { "clip_ratio/high_max": 0.004043851105961949, "clip_ratio/high_mean": 0.001705165188468527, "clip_ratio/low_mean": 0.0007879013419369585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024930664658313617, "epoch": 2.5762613006707493, "grad_norm": 4.0191144943237305, "learning_rate": 1e-06, "loss": -0.0986, "step": 1103 }, { "clip_ratio/high_max": 0.0036373382972669788, "clip_ratio/high_mean": 0.001472266962082358, "clip_ratio/low_mean": 0.0009357184671898722, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002407985390163958, "epoch": 2.57859434237387, "grad_norm": 0.3068734109401703, "learning_rate": 1e-06, "loss": -0.0989, "step": 1104 }, { "clip_ratio/high_max": 0.002238598928670399, "clip_ratio/high_mean": 0.0007004740091360873, "clip_ratio/low_mean": 0.00038520657835761085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010856805711227935, "completions/clipped_ratio": 0.2020089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3273.0, "completions/mean_length": 1309.8515625, "completions/mean_terminated_length": 604.546875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.5809273840769906, "grad_norm": 0.3221093714237213, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 168002258.0, "reward": 0.5569196939468384, "reward_std": 0.12125468999147415, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1105 }, { "clip_ratio/high_max": 0.002600621053716168, "clip_ratio/high_mean": 0.0008564142353861826, "clip_ratio/low_mean": 0.0004979427831131034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013543570239562541, "epoch": 2.583260425780111, "grad_norm": 0.5847398638725281, "learning_rate": 1e-06, "loss": -0.0645, "step": 1106 }, { "clip_ratio/high_max": 0.0025966732355300337, "clip_ratio/high_mean": 0.0009234250410372624, "clip_ratio/low_mean": 0.0006668909181826166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015903159801382571, "epoch": 2.585593467483231, "grad_norm": 0.29294657707214355, "learning_rate": 1e-06, "loss": -0.0647, "step": 1107 }, { "clip_ratio/high_max": 0.002481639188772533, "clip_ratio/high_mean": 0.0008915475882531609, "clip_ratio/low_mean": 0.0006612833558392595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015528309340879787, "epoch": 2.5879265091863517, "grad_norm": 0.3136427104473114, "learning_rate": 1e-06, "loss": -0.0647, "step": 1108 }, { "clip_ratio/high_max": 0.0023608384872204624, "clip_ratio/high_mean": 0.0010682869906304404, "clip_ratio/low_mean": 0.0007104948817868717, "clip_ratio/low_min": 5.823331048304681e-05, "clip_ratio/region_mean": 0.0017787818323995452, "completions/clipped_ratio": 0.2243303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3934.0, "completions/mean_length": 1447.982177734375, "completions/mean_terminated_length": 682.1525268554688, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 2.5902595508894724, "grad_norm": 0.6740090250968933, "learning_rate": 1e-06, "loss": -0.1181, "num_tokens": 168568946.0, "reward": 0.4620535969734192, "reward_std": 0.20414264500141144, "rewards/verify_math_reward/mean": 0.4620535671710968, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1109 }, { "clip_ratio/high_max": 0.002837435604305938, "clip_ratio/high_mean": 0.0012745397543767467, "clip_ratio/low_mean": 0.0009604546012269566, "clip_ratio/low_min": 2.7636524464469403e-05, "clip_ratio/region_mean": 0.0022349943974404596, "epoch": 2.5925925925925926, "grad_norm": 0.3448570966720581, "learning_rate": 1e-06, "loss": -0.1184, "step": 1110 }, { "clip_ratio/high_max": 0.0029420681748888455, "clip_ratio/high_mean": 0.0013234237048891373, "clip_ratio/low_mean": 0.0010066520881082397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023300757602555677, "epoch": 2.5949256342957128, "grad_norm": 0.319291889667511, "learning_rate": 1e-06, "loss": -0.1185, "step": 1111 }, { "clip_ratio/high_max": 0.002652253591804765, "clip_ratio/high_mean": 0.0012514144837041385, "clip_ratio/low_mean": 0.001184429886052385, "clip_ratio/low_min": 1.1864084626722615e-05, "clip_ratio/region_mean": 0.002435844413412269, "epoch": 2.5972586759988334, "grad_norm": 0.29940861463546753, "learning_rate": 1e-06, "loss": -0.1187, "step": 1112 }, { "clip_ratio/high_max": 0.002024792025622446, "clip_ratio/high_mean": 0.0008153233029588591, "clip_ratio/low_mean": 0.00040916247758104873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012244858044141438, "completions/clipped_ratio": 0.2310267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 1456.376220703125, "completions/mean_terminated_length": 663.339599609375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.599591717701954, "grad_norm": 0.6689534783363342, "learning_rate": 1e-06, "loss": -0.1404, "num_tokens": 169113011.0, "reward": 0.5133928656578064, "reward_std": 0.15680241584777832, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1113 }, { "clip_ratio/high_max": 0.0029982208216097206, "clip_ratio/high_mean": 0.001065076187842351, "clip_ratio/low_mean": 0.0005774909777755965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016425671929027885, "epoch": 2.6019247594050743, "grad_norm": 1811.84814453125, "learning_rate": 1e-06, "loss": -0.0822, "step": 1114 }, { "clip_ratio/high_max": 0.0023074028285918757, "clip_ratio/high_mean": 0.0008775739388511283, "clip_ratio/low_mean": 0.0006217766222107457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014993505828897469, "epoch": 2.604257801108195, "grad_norm": 0.2456456422805786, "learning_rate": 1e-06, "loss": -0.1406, "step": 1115 }, { "clip_ratio/high_max": 0.0027369309318601154, "clip_ratio/high_mean": 0.0009234424614987802, "clip_ratio/low_mean": 0.0007131405009204173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001636582976061618, "epoch": 2.606590842811315, "grad_norm": 0.45565861463546753, "learning_rate": 1e-06, "loss": -0.1406, "step": 1116 }, { "clip_ratio/high_max": 0.002747816077317111, "clip_ratio/high_mean": 0.0010218096686003264, "clip_ratio/low_mean": 0.000619154072865058, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016409637464676052, "completions/clipped_ratio": 0.2142857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 1369.5469970703125, "completions/mean_terminated_length": 625.96875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 2.608923884514436, "grad_norm": 0.7953383326530457, "learning_rate": 1e-06, "loss": -0.0843, "num_tokens": 169638077.0, "reward": 0.5290178656578064, "reward_std": 0.16739985346794128, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1117 }, { "clip_ratio/high_max": 0.003091791986662429, "clip_ratio/high_mean": 0.0012057667263434269, "clip_ratio/low_mean": 0.0008026635414353223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020084301941096783, "epoch": 2.611256926217556, "grad_norm": 0.3928907513618469, "learning_rate": 1e-06, "loss": -0.0845, "step": 1118 }, { "clip_ratio/high_max": 0.003543371902196668, "clip_ratio/high_mean": 0.0012741020273097092, "clip_ratio/low_mean": 0.001016939710098086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022910417537787, "epoch": 2.6135899679206767, "grad_norm": 0.3165361285209656, "learning_rate": 1e-06, "loss": -0.0847, "step": 1119 }, { "clip_ratio/high_max": 0.0030116342022665776, "clip_ratio/high_mean": 0.0011942537039431045, "clip_ratio/low_mean": 0.001103612292354228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022978659690124914, "epoch": 2.615923009623797, "grad_norm": 0.529883623123169, "learning_rate": 1e-06, "loss": -0.0847, "step": 1120 }, { "clip_ratio/high_max": 0.0024848664470482618, "clip_ratio/high_mean": 0.0010485216007509734, "clip_ratio/low_mean": 0.0006189896130308625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016675112347002141, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3298.0, "completions/mean_length": 1200.454345703125, "completions/mean_terminated_length": 599.4918823242188, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 2.6182560513269175, "grad_norm": 1.3846943378448486, "learning_rate": 1e-06, "loss": -0.0956, "num_tokens": 170163820.0, "reward": 0.5770089626312256, "reward_std": 0.18295595049858093, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1121 }, { "clip_ratio/high_max": 0.002593300538137555, "clip_ratio/high_mean": 0.0011367023835191503, "clip_ratio/low_mean": 0.0006657016401732108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001802404032787308, "epoch": 2.6205890930300377, "grad_norm": 0.6504513025283813, "learning_rate": 1e-06, "loss": -0.0959, "step": 1122 }, { "clip_ratio/high_max": 0.002823696020641364, "clip_ratio/high_mean": 0.001175528326712083, "clip_ratio/low_mean": 0.0009673598942754325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021428882755571976, "epoch": 2.6229221347331584, "grad_norm": 0.33897146582603455, "learning_rate": 1e-06, "loss": -0.0962, "step": 1123 }, { "clip_ratio/high_max": 0.0027405195651226677, "clip_ratio/high_mean": 0.0011859714140882716, "clip_ratio/low_mean": 0.0010560815298958914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022420529858209193, "epoch": 2.625255176436279, "grad_norm": 0.2904823422431946, "learning_rate": 1e-06, "loss": -0.0962, "step": 1124 }, { "clip_ratio/high_max": 0.002671617694431916, "clip_ratio/high_mean": 0.0010329381439078134, "clip_ratio/low_mean": 0.000761073310059146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017940114048542455, "completions/clipped_ratio": 0.1808035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3348.0, "completions/mean_length": 1263.12060546875, "completions/mean_terminated_length": 637.880126953125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.6275882181393992, "grad_norm": 0.32910025119781494, "learning_rate": 1e-06, "loss": -0.0856, "num_tokens": 170716688.0, "reward": 0.613839328289032, "reward_std": 0.15842002630233765, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1125 }, { "clip_ratio/high_max": 0.0029186287210904993, "clip_ratio/high_mean": 0.001100414479878964, "clip_ratio/low_mean": 0.0008873164952092338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019877310187439434, "epoch": 2.6299212598425195, "grad_norm": 2.1768527030944824, "learning_rate": 1e-06, "loss": -0.0854, "step": 1126 }, { "clip_ratio/high_max": 0.002809271158184856, "clip_ratio/high_mean": 0.0011631883317022584, "clip_ratio/low_mean": 0.0009846958491834812, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002147884173609782, "epoch": 2.63225430154564, "grad_norm": 0.6363303065299988, "learning_rate": 1e-06, "loss": -0.0857, "step": 1127 }, { "clip_ratio/high_max": 0.003005076723638922, "clip_ratio/high_mean": 0.0011632474124780856, "clip_ratio/low_mean": 0.0010633265919750556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022265739826252684, "epoch": 2.6345873432487608, "grad_norm": 0.34618237614631653, "learning_rate": 1e-06, "loss": -0.0859, "step": 1128 }, { "clip_ratio/high_max": 0.003163040863000788, "clip_ratio/high_mean": 0.0010369830943091074, "clip_ratio/low_mean": 0.0006128021541371709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016497852775501087, "completions/clipped_ratio": 0.2042410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3732.0, "completions/mean_length": 1328.915283203125, "completions/mean_terminated_length": 618.7096557617188, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 2.636920384951881, "grad_norm": 0.28497305512428284, "learning_rate": 1e-06, "loss": -0.0976, "num_tokens": 171238340.0, "reward": 0.5178571939468384, "reward_std": 0.16070660948753357, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1129 }, { "clip_ratio/high_max": 0.003044958215468796, "clip_ratio/high_mean": 0.0010268721052852925, "clip_ratio/low_mean": 0.0008082405893219402, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018351126745983493, "epoch": 2.6392534266550016, "grad_norm": 0.4064182937145233, "learning_rate": 1e-06, "loss": -0.0976, "step": 1130 }, { "clip_ratio/high_max": 0.003228273526474368, "clip_ratio/high_mean": 0.0011854440490424167, "clip_ratio/low_mean": 0.0008247964915426564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020102405396755785, "epoch": 2.641586468358122, "grad_norm": 38.411399841308594, "learning_rate": 1e-06, "loss": -0.0961, "step": 1131 }, { "clip_ratio/high_max": 0.0029341832123463973, "clip_ratio/high_mean": 0.0010485752718523145, "clip_ratio/low_mean": 0.0009154648760159034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019640401733340696, "epoch": 2.6439195100612425, "grad_norm": 0.3218270540237427, "learning_rate": 1e-06, "loss": -0.0978, "step": 1132 }, { "clip_ratio/high_max": 0.003128078635199927, "clip_ratio/high_mean": 0.0013599506000900874, "clip_ratio/low_mean": 0.0009093163953366457, "clip_ratio/low_min": 1.888788210635539e-05, "clip_ratio/region_mean": 0.0022692670099786483, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 1140.2835693359375, "completions/mean_terminated_length": 638.6605834960938, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 2.6462525517643627, "grad_norm": 0.45187193155288696, "learning_rate": 1e-06, "loss": -0.081, "num_tokens": 171822810.0, "reward": 0.5491071939468384, "reward_std": 0.23007933795452118, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1133 }, { "clip_ratio/high_max": 0.003938625821319874, "clip_ratio/high_mean": 0.001572631150338566, "clip_ratio/low_mean": 0.0012122987136535812, "clip_ratio/low_min": 7.665810153412167e-05, "clip_ratio/region_mean": 0.002784929907647893, "epoch": 2.6485855934674833, "grad_norm": 0.42039352655410767, "learning_rate": 1e-06, "loss": -0.0813, "step": 1134 }, { "clip_ratio/high_max": 0.00367605900100898, "clip_ratio/high_mean": 0.0015539925479970407, "clip_ratio/low_mean": 0.0012935690792801324, "clip_ratio/low_min": 7.892544272181112e-05, "clip_ratio/region_mean": 0.0028475616345531307, "epoch": 2.6509186351706036, "grad_norm": 0.5937815308570862, "learning_rate": 1e-06, "loss": -0.0814, "step": 1135 }, { "clip_ratio/high_max": 0.003293553054390941, "clip_ratio/high_mean": 0.0014502697122225072, "clip_ratio/low_mean": 0.0015203051771095488, "clip_ratio/low_min": 6.533915984618943e-05, "clip_ratio/region_mean": 0.0029705748456763104, "epoch": 2.653251676873724, "grad_norm": 0.35216888785362244, "learning_rate": 1e-06, "loss": -0.0816, "step": 1136 }, { "clip_ratio/high_max": 0.0026569641777314246, "clip_ratio/high_mean": 0.0010453657996549737, "clip_ratio/low_mean": 0.00046456411109829787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001509929930762155, "completions/clipped_ratio": 0.1986607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3261.0, "completions/mean_length": 1334.618408203125, "completions/mean_terminated_length": 650.041748046875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.6555847185768444, "grad_norm": 0.418119877576828, "learning_rate": 1e-06, "loss": -0.0805, "num_tokens": 172379108.0, "reward": 0.5401785969734192, "reward_std": 0.16548165678977966, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 1137 }, { "clip_ratio/high_max": 0.0033592411637073383, "clip_ratio/high_mean": 0.001202950417791726, "clip_ratio/low_mean": 0.0006354800334520405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018384304312348831, "epoch": 2.657917760279965, "grad_norm": 0.6130971908569336, "learning_rate": 1e-06, "loss": -0.0807, "step": 1138 }, { "clip_ratio/high_max": 0.002843533191480674, "clip_ratio/high_mean": 0.0011432552200858481, "clip_ratio/low_mean": 0.0006709004319418455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018141556611226406, "epoch": 2.6602508019830857, "grad_norm": 0.2924841344356537, "learning_rate": 1e-06, "loss": -0.0808, "step": 1139 }, { "clip_ratio/high_max": 0.00305314327852102, "clip_ratio/high_mean": 0.0012556427755043842, "clip_ratio/low_mean": 0.00088906003475131, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021447028120746836, "epoch": 2.662583843686206, "grad_norm": 0.3281480371952057, "learning_rate": 1e-06, "loss": -0.081, "step": 1140 }, { "clip_ratio/high_max": 0.002709077722101938, "clip_ratio/high_mean": 0.000940929396165302, "clip_ratio/low_mean": 0.0005478688526636688, "clip_ratio/low_min": 1.8032313164439984e-05, "clip_ratio/region_mean": 0.001488798254285939, "completions/clipped_ratio": 0.2466517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 1507.376220703125, "completions/mean_terminated_length": 659.8414916992188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.664916885389326, "grad_norm": 0.6969342231750488, "learning_rate": 1e-06, "loss": -0.0959, "num_tokens": 172913461.0, "reward": 0.4620535969734192, "reward_std": 0.17708687484264374, "rewards/verify_math_reward/mean": 0.4620535671710968, "rewards/verify_math_reward/std": 0.4988364279270172, "step": 1141 }, { "clip_ratio/high_max": 0.0028218030420248397, "clip_ratio/high_mean": 0.0010632010853441898, "clip_ratio/low_mean": 0.0007978456651471788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018610467232065275, "epoch": 2.667249927092447, "grad_norm": 0.46112796664237976, "learning_rate": 1e-06, "loss": -0.0962, "step": 1142 }, { "clip_ratio/high_max": 0.0032739845701144077, "clip_ratio/high_mean": 0.0012285602097108494, "clip_ratio/low_mean": 0.0009737260570545914, "clip_ratio/low_min": 1.8032313164439984e-05, "clip_ratio/region_mean": 0.002202286254032515, "epoch": 2.6695829687955674, "grad_norm": 0.28367793560028076, "learning_rate": 1e-06, "loss": -0.0964, "step": 1143 }, { "clip_ratio/high_max": 0.0029089382078382187, "clip_ratio/high_mean": 0.0010686329806048889, "clip_ratio/low_mean": 0.0010766160321509233, "clip_ratio/low_min": 3.294097041361965e-05, "clip_ratio/region_mean": 0.00214524898910895, "epoch": 2.6719160104986877, "grad_norm": 0.46802031993865967, "learning_rate": 1e-06, "loss": -0.0964, "step": 1144 }, { "clip_ratio/high_max": 0.002234045328805223, "clip_ratio/high_mean": 0.0008303530084958766, "clip_ratio/low_mean": 0.0006504649991256883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001480818016716512, "completions/clipped_ratio": 0.2209821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3882.0, "completions/mean_length": 1438.5748291015625, "completions/mean_terminated_length": 684.7493286132812, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.674249052201808, "grad_norm": 13.241521835327148, "learning_rate": 1e-06, "loss": -0.0555, "num_tokens": 173486032.0, "reward": 0.4754464626312256, "reward_std": 0.1437215656042099, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756315231323, "step": 1145 }, { "clip_ratio/high_max": 0.0020223169813107233, "clip_ratio/high_mean": 0.0007725869581918232, "clip_ratio/low_mean": 0.0006849648616480408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014575518252968322, "epoch": 2.6765820939049285, "grad_norm": 0.33175984025001526, "learning_rate": 1e-06, "loss": -0.0559, "step": 1146 }, { "clip_ratio/high_max": 0.0023865793191362172, "clip_ratio/high_mean": 0.0008635350677650422, "clip_ratio/low_mean": 0.0008964863518485799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001760021405061707, "epoch": 2.678915135608049, "grad_norm": 1.2590866088867188, "learning_rate": 1e-06, "loss": -0.056, "step": 1147 }, { "clip_ratio/high_max": 0.002634438991663046, "clip_ratio/high_mean": 0.0009965031622414244, "clip_ratio/low_mean": 0.0009439705863769632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019404737759032287, "epoch": 2.6812481773111694, "grad_norm": 0.5095630884170532, "learning_rate": 1e-06, "loss": -0.0561, "step": 1148 }, { "clip_ratio/high_max": 0.002346159133594483, "clip_ratio/high_mean": 0.0009859569763648324, "clip_ratio/low_mean": 0.0005352786920411745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001521235655673081, "completions/clipped_ratio": 0.1830357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3382.0, "completions/mean_length": 1292.875, "completions/mean_terminated_length": 664.8524169921875, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 2.68358121901429, "grad_norm": 0.8633759617805481, "learning_rate": 1e-06, "loss": -0.0697, "num_tokens": 174057552.0, "reward": 0.5301339626312256, "reward_std": 0.15928654372692108, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1149 }, { "clip_ratio/high_max": 0.002377275624894537, "clip_ratio/high_mean": 0.00102085448543221, "clip_ratio/low_mean": 0.0006879462214328669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017088007371057756, "epoch": 2.6859142607174102, "grad_norm": 0.35880351066589355, "learning_rate": 1e-06, "loss": -0.0701, "step": 1150 }, { "clip_ratio/high_max": 0.002441424370772438, "clip_ratio/high_mean": 0.0010906849611274083, "clip_ratio/low_mean": 0.0008116939304727566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019023788918275386, "epoch": 2.688247302420531, "grad_norm": 0.6140947341918945, "learning_rate": 1e-06, "loss": -0.0702, "step": 1151 }, { "clip_ratio/high_max": 0.0026908730433206074, "clip_ratio/high_mean": 0.001115784996727598, "clip_ratio/low_mean": 0.001018672070131288, "clip_ratio/low_min": 1.7650381778366864e-05, "clip_ratio/region_mean": 0.0021344570923247375, "epoch": 2.690580344123651, "grad_norm": 0.5297927856445312, "learning_rate": 1e-06, "loss": -0.0703, "step": 1152 }, { "clip_ratio/high_max": 0.00229253667203011, "clip_ratio/high_mean": 0.0008567258309994941, "clip_ratio/low_mean": 0.0006271222564464551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014838480856269598, "completions/clipped_ratio": 0.2455357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3193.0, "completions/mean_length": 1493.4888916015625, "completions/mean_terminated_length": 646.5177612304688, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 2.6929133858267718, "grad_norm": 0.4879203736782074, "learning_rate": 1e-06, "loss": -0.081, "num_tokens": 174578886.0, "reward": 0.5066964626312256, "reward_std": 0.14327649772167206, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1153 }, { "clip_ratio/high_max": 0.002820141162374057, "clip_ratio/high_mean": 0.0010485778602742357, "clip_ratio/low_mean": 0.0008111345896395505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018597124435473233, "epoch": 2.695246427529892, "grad_norm": 1.025081753730774, "learning_rate": 1e-06, "loss": -0.0813, "step": 1154 }, { "clip_ratio/high_max": 0.002287321512994822, "clip_ratio/high_mean": 0.0008796608053671662, "clip_ratio/low_mean": 0.0007927283941171481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001672389182203915, "epoch": 2.6975794692330126, "grad_norm": 0.45984700322151184, "learning_rate": 1e-06, "loss": -0.0814, "step": 1155 }, { "clip_ratio/high_max": 0.002736065460339887, "clip_ratio/high_mean": 0.000941986949328566, "clip_ratio/low_mean": 0.0010106050467584282, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019525920142768882, "epoch": 2.699912510936133, "grad_norm": 0.24206320941448212, "learning_rate": 1e-06, "loss": -0.0815, "step": 1156 }, { "clip_ratio/high_max": 0.002704082704440225, "clip_ratio/high_mean": 0.0010917078016063897, "clip_ratio/low_mean": 0.0006502630403701914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017419708674424328, "completions/clipped_ratio": 0.2075892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 1336.1429443359375, "completions/mean_terminated_length": 613.1380004882812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.7022455526392535, "grad_norm": 0.5276772975921631, "learning_rate": 1e-06, "loss": -0.0794, "num_tokens": 175101662.0, "reward": 0.527901828289032, "reward_std": 0.19422627985477448, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1157 }, { "clip_ratio/high_max": 0.0036155347333988175, "clip_ratio/high_mean": 0.001260407882000436, "clip_ratio/low_mean": 0.0009019947065098677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002162402590329293, "epoch": 2.704578594342374, "grad_norm": 0.44234132766723633, "learning_rate": 1e-06, "loss": -0.0794, "step": 1158 }, { "clip_ratio/high_max": 0.0034713963832473382, "clip_ratio/high_mean": 0.0012998216807318386, "clip_ratio/low_mean": 0.0010530417966947425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002352863441046793, "epoch": 2.7069116360454943, "grad_norm": 0.3450758159160614, "learning_rate": 1e-06, "loss": -0.0788, "step": 1159 }, { "clip_ratio/high_max": 0.00350081647047773, "clip_ratio/high_mean": 0.0012667452174355276, "clip_ratio/low_mean": 0.0011900519093615003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024567971340729855, "epoch": 2.7092446777486145, "grad_norm": 0.34673169255256653, "learning_rate": 1e-06, "loss": -0.0798, "step": 1160 }, { "clip_ratio/high_max": 0.002561336717917584, "clip_ratio/high_mean": 0.001071713282726705, "clip_ratio/low_mean": 0.0004978662182111293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015695795482315589, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3651.0, "completions/mean_length": 1256.052490234375, "completions/mean_terminated_length": 680.43896484375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 2.711577719451735, "grad_norm": 0.4483858644962311, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 175698885.0, "reward": 0.5814732313156128, "reward_std": 0.14500966668128967, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1161 }, { "clip_ratio/high_max": 0.002734059155045543, "clip_ratio/high_mean": 0.0010555035623838194, "clip_ratio/low_mean": 0.0006368004378600745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016923040093388408, "epoch": 2.713910761154856, "grad_norm": 0.2480604201555252, "learning_rate": 1e-06, "loss": -0.0693, "step": 1162 }, { "clip_ratio/high_max": 0.002743198892858345, "clip_ratio/high_mean": 0.0011967169793933863, "clip_ratio/low_mean": 0.0006405044732673559, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001837221447203774, "epoch": 2.716243802857976, "grad_norm": 0.22542735934257507, "learning_rate": 1e-06, "loss": -0.0694, "step": 1163 }, { "clip_ratio/high_max": 0.0028109701015637256, "clip_ratio/high_mean": 0.0011083299432357308, "clip_ratio/low_mean": 0.0007722217160335276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018805516519933008, "epoch": 2.7185768445610963, "grad_norm": 0.24027276039123535, "learning_rate": 1e-06, "loss": -0.0695, "step": 1164 }, { "clip_ratio/high_max": 0.002535520172386896, "clip_ratio/high_mean": 0.0009264497912226943, "clip_ratio/low_mean": 0.0007382482308457838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001664698007516563, "completions/clipped_ratio": 0.2377232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3195.0, "completions/mean_length": 1468.7913818359375, "completions/mean_terminated_length": 649.471435546875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 2.720909886264217, "grad_norm": 0.5111014246940613, "learning_rate": 1e-06, "loss": -0.0825, "num_tokens": 176244842.0, "reward": 0.4676339626312256, "reward_std": 0.16078400611877441, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1165 }, { "clip_ratio/high_max": 0.0032525707574677654, "clip_ratio/high_mean": 0.0011257290880166693, "clip_ratio/low_mean": 0.0008191775759769371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019449066458037123, "epoch": 2.7232429279673376, "grad_norm": 0.3928147256374359, "learning_rate": 1e-06, "loss": -0.0828, "step": 1166 }, { "clip_ratio/high_max": 0.003005642312928103, "clip_ratio/high_mean": 0.0011073992409365019, "clip_ratio/low_mean": 0.0010735299856605707, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002180929215683136, "epoch": 2.725575969670458, "grad_norm": 0.31387534737586975, "learning_rate": 1e-06, "loss": -0.083, "step": 1167 }, { "clip_ratio/high_max": 0.003178802609909326, "clip_ratio/high_mean": 0.0011590168778639054, "clip_ratio/low_mean": 0.0011903664508281508, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002349383292312268, "epoch": 2.7279090113735784, "grad_norm": 0.3557435870170593, "learning_rate": 1e-06, "loss": -0.0831, "step": 1168 }, { "clip_ratio/high_max": 0.0023208540442283265, "clip_ratio/high_mean": 0.0009330180309916614, "clip_ratio/low_mean": 0.0005025064037909033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014355244493344799, "completions/clipped_ratio": 0.2265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 1473.0804443359375, "completions/mean_terminated_length": 704.7503662109375, "completions/min_length": 200.0, "completions/min_terminated_length": 200.0, "epoch": 2.7302420530766986, "grad_norm": 0.8241352438926697, "learning_rate": 1e-06, "loss": -0.0855, "num_tokens": 176824194.0, "reward": 0.4665178656578064, "reward_std": 0.16262732446193695, "rewards/verify_math_reward/mean": 0.4665178656578064, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1169 }, { "clip_ratio/high_max": 0.0027015641680918634, "clip_ratio/high_mean": 0.0010457208154548425, "clip_ratio/low_mean": 0.0006015631061018212, "clip_ratio/low_min": 2.4719842258491553e-05, "clip_ratio/region_mean": 0.0016472839124617167, "epoch": 2.7325750947798193, "grad_norm": 0.6426844596862793, "learning_rate": 1e-06, "loss": -0.0855, "step": 1170 }, { "clip_ratio/high_max": 0.0025475440415902995, "clip_ratio/high_mean": 0.0010810444000526331, "clip_ratio/low_mean": 0.0007570257330371533, "clip_ratio/low_min": 1.5980567695805803e-05, "clip_ratio/region_mean": 0.00183807012217585, "epoch": 2.7349081364829395, "grad_norm": 0.23336288332939148, "learning_rate": 1e-06, "loss": -0.0858, "step": 1171 }, { "clip_ratio/high_max": 0.0029175605668569915, "clip_ratio/high_mean": 0.0011544715762283886, "clip_ratio/low_mean": 0.0007846912976674503, "clip_ratio/low_min": 1.5980567695805803e-05, "clip_ratio/region_mean": 0.0019391628447920084, "epoch": 2.73724117818606, "grad_norm": 0.4975951313972473, "learning_rate": 1e-06, "loss": -0.0858, "step": 1172 }, { "clip_ratio/high_max": 0.0030730391808901913, "clip_ratio/high_mean": 0.001277351038879715, "clip_ratio/low_mean": 0.0007418803525069961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002019231367739849, "completions/clipped_ratio": 0.2053571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3666.0, "completions/mean_length": 1369.0123291015625, "completions/mean_terminated_length": 664.2850952148438, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 2.7395742198891804, "grad_norm": 0.38559573888778687, "learning_rate": 1e-06, "loss": -0.121, "num_tokens": 177397997.0, "reward": 0.5033482313156128, "reward_std": 0.184984028339386, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1173 }, { "clip_ratio/high_max": 0.0032080450182547793, "clip_ratio/high_mean": 0.0012933712314406876, "clip_ratio/low_mean": 0.0009027979558595689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021961692182230763, "epoch": 2.741907261592301, "grad_norm": 0.5801857709884644, "learning_rate": 1e-06, "loss": -0.1209, "step": 1174 }, { "clip_ratio/high_max": 0.003475419849564787, "clip_ratio/high_mean": 0.001406010294886073, "clip_ratio/low_mean": 0.0010067024304589722, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002412712732621003, "epoch": 2.7442403032954212, "grad_norm": 0.40939491987228394, "learning_rate": 1e-06, "loss": -0.1211, "step": 1175 }, { "clip_ratio/high_max": 0.0033730757641023956, "clip_ratio/high_mean": 0.0012840911876992323, "clip_ratio/low_mean": 0.0012767023035848979, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025607934658182785, "epoch": 2.746573344998542, "grad_norm": 0.260544091463089, "learning_rate": 1e-06, "loss": -0.1213, "step": 1176 }, { "clip_ratio/high_max": 0.003310453364974819, "clip_ratio/high_mean": 0.0011787906823883532, "clip_ratio/low_mean": 0.000666306236780656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018450969437253661, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3759.0, "completions/mean_length": 1279.7098388671875, "completions/mean_terminated_length": 634.5514526367188, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.7489063867016625, "grad_norm": 0.4941178858280182, "learning_rate": 1e-06, "loss": -0.0978, "num_tokens": 177946505.0, "reward": 0.5479910969734192, "reward_std": 0.19517098367214203, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 1177 }, { "clip_ratio/high_max": 0.003174037155986298, "clip_ratio/high_mean": 0.0012483538921514992, "clip_ratio/low_mean": 0.0008706993994564982, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002119053249771241, "epoch": 2.7512394284047827, "grad_norm": 2.1607589721679688, "learning_rate": 1e-06, "loss": -0.0981, "step": 1178 }, { "clip_ratio/high_max": 0.0028290177724556997, "clip_ratio/high_mean": 0.0011984598386334255, "clip_ratio/low_mean": 0.0009329210479336325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021313808974809945, "epoch": 2.753572470107903, "grad_norm": 0.3275543451309204, "learning_rate": 1e-06, "loss": -0.0983, "step": 1179 }, { "clip_ratio/high_max": 0.0030925701576052234, "clip_ratio/high_mean": 0.001183344562377897, "clip_ratio/low_mean": 0.0011141808518004837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022975253814365715, "epoch": 2.7559055118110236, "grad_norm": 0.3744140565395355, "learning_rate": 1e-06, "loss": -0.0983, "step": 1180 }, { "clip_ratio/high_max": 0.0027759652220993303, "clip_ratio/high_mean": 0.0012045109851896996, "clip_ratio/low_mean": 0.0006114456891737063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018159566752729006, "completions/clipped_ratio": 0.21875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3585.0, "completions/mean_length": 1412.524658203125, "completions/mean_terminated_length": 661.1514282226562, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 2.7582385535141443, "grad_norm": 5.215410232543945, "learning_rate": 1e-06, "loss": -0.1067, "num_tokens": 178497975.0, "reward": 0.5, "reward_std": 0.19982153177261353, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 1181 }, { "clip_ratio/high_max": 0.0025053154786292, "clip_ratio/high_mean": 0.0012082497996743768, "clip_ratio/low_mean": 0.0006256060742089176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018338558511459269, "epoch": 2.7605715952172645, "grad_norm": 0.33788397908210754, "learning_rate": 1e-06, "loss": -0.107, "step": 1182 }, { "clip_ratio/high_max": 0.0028168978496978525, "clip_ratio/high_mean": 0.00139375053186086, "clip_ratio/low_mean": 0.0007377244619419798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021314749828889035, "epoch": 2.7629046369203847, "grad_norm": 0.5041782259941101, "learning_rate": 1e-06, "loss": -0.1071, "step": 1183 }, { "clip_ratio/high_max": 0.003248566121328622, "clip_ratio/high_mean": 0.0013238475403341, "clip_ratio/low_mean": 0.0008928354200179456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022166829730849713, "epoch": 2.7652376786235053, "grad_norm": 0.956540048122406, "learning_rate": 1e-06, "loss": -0.1073, "step": 1184 }, { "clip_ratio/high_max": 0.002682205456949305, "clip_ratio/high_mean": 0.0011247242146055214, "clip_ratio/low_mean": 0.0007325166880036704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018572409026091918, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3174.0, "completions/mean_length": 1280.875, "completions/mean_terminated_length": 635.9835205078125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.767570720326626, "grad_norm": 620.8098754882812, "learning_rate": 1e-06, "loss": -0.0617, "num_tokens": 179049895.0, "reward": 0.4843750298023224, "reward_std": 0.19806334376335144, "rewards/verify_math_reward/mean": 0.484375, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1185 }, { "clip_ratio/high_max": 0.002695905030122958, "clip_ratio/high_mean": 0.001189483704365557, "clip_ratio/low_mean": 0.0007673921663808869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019568758725654334, "epoch": 2.769903762029746, "grad_norm": 0.43561306595802307, "learning_rate": 1e-06, "loss": -0.0971, "step": 1186 }, { "clip_ratio/high_max": 0.002758124355750624, "clip_ratio/high_mean": 0.0012929525764775462, "clip_ratio/low_mean": 0.0009385263638250763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002231479018519167, "epoch": 2.772236803732867, "grad_norm": 0.770324170589447, "learning_rate": 1e-06, "loss": -0.0971, "step": 1187 }, { "clip_ratio/high_max": 0.0030686905229231343, "clip_ratio/high_mean": 0.001354892074232339, "clip_ratio/low_mean": 0.0011114366971014533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024663287622388452, "epoch": 2.774569845435987, "grad_norm": 0.3574642241001129, "learning_rate": 1e-06, "loss": -0.0974, "step": 1188 }, { "clip_ratio/high_max": 0.0026154229817620944, "clip_ratio/high_mean": 0.0010374783396400744, "clip_ratio/low_mean": 0.0007542430430476088, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017917213481268845, "completions/clipped_ratio": 0.2198660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3420.0, "completions/mean_length": 1388.01904296875, "completions/mean_terminated_length": 624.8255004882812, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.7769028871391077, "grad_norm": 0.4097230136394501, "learning_rate": 1e-06, "loss": -0.0783, "num_tokens": 179578920.0, "reward": 0.5133928656578064, "reward_std": 0.17314808070659637, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1189 }, { "clip_ratio/high_max": 0.0029850793216610327, "clip_ratio/high_mean": 0.001117129215344903, "clip_ratio/low_mean": 0.0009277236695197644, "clip_ratio/low_min": 1.780626735126134e-05, "clip_ratio/region_mean": 0.0020448528666747734, "epoch": 2.779235928842228, "grad_norm": 0.30736514925956726, "learning_rate": 1e-06, "loss": -0.0784, "step": 1190 }, { "clip_ratio/high_max": 0.003480989471427165, "clip_ratio/high_mean": 0.0012734314077533782, "clip_ratio/low_mean": 0.000981102375590126, "clip_ratio/low_min": 1.780626735126134e-05, "clip_ratio/region_mean": 0.0022545338215422817, "epoch": 2.7815689705453486, "grad_norm": 0.24554550647735596, "learning_rate": 1e-06, "loss": -0.0786, "step": 1191 }, { "clip_ratio/high_max": 0.0026999378351320047, "clip_ratio/high_mean": 0.0010141140228370205, "clip_ratio/low_mean": 0.0010961974257952534, "clip_ratio/low_min": 3.561253470252268e-05, "clip_ratio/region_mean": 0.002110311485012062, "epoch": 2.783902012248469, "grad_norm": 0.27114391326904297, "learning_rate": 1e-06, "loss": -0.0786, "step": 1192 }, { "clip_ratio/high_max": 0.003692436068376992, "clip_ratio/high_mean": 0.001422491444827756, "clip_ratio/low_mean": 0.0007696351385675371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002192126597947208, "completions/clipped_ratio": 0.21875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3356.0, "completions/mean_length": 1406.8829345703125, "completions/mean_terminated_length": 653.9299926757812, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 2.7862350539515894, "grad_norm": 4.933393955230713, "learning_rate": 1e-06, "loss": -0.1046, "num_tokens": 180124503.0, "reward": 0.486607164144516, "reward_std": 0.20534475147724152, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1193 }, { "clip_ratio/high_max": 0.0035042766321566887, "clip_ratio/high_mean": 0.0014380086431629024, "clip_ratio/low_mean": 0.0008770264430495445, "clip_ratio/low_min": 4.01464531023521e-05, "clip_ratio/region_mean": 0.002315035140782129, "epoch": 2.7885680956547096, "grad_norm": 0.36338376998901367, "learning_rate": 1e-06, "loss": -0.1047, "step": 1194 }, { "clip_ratio/high_max": 0.004006888462754432, "clip_ratio/high_mean": 0.0016332374361809343, "clip_ratio/low_mean": 0.0010336481645936146, "clip_ratio/low_min": 1.3320545804162975e-05, "clip_ratio/region_mean": 0.0026668855934985913, "epoch": 2.7909011373578303, "grad_norm": 0.41212084889411926, "learning_rate": 1e-06, "loss": -0.1049, "step": 1195 }, { "clip_ratio/high_max": 0.00416826170112472, "clip_ratio/high_mean": 0.0016723525550332852, "clip_ratio/low_mean": 0.0013030424543103436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002975394992972724, "epoch": 2.793234179060951, "grad_norm": 3.8023581504821777, "learning_rate": 1e-06, "loss": -0.1051, "step": 1196 }, { "clip_ratio/high_max": 0.003428379524848424, "clip_ratio/high_mean": 0.0013051098121650284, "clip_ratio/low_mean": 0.0007041274875518866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020092372651561163, "completions/clipped_ratio": 0.2120535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3702.0, "completions/mean_length": 1384.1116943359375, "completions/mean_terminated_length": 654.2832641601562, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 2.795567220764071, "grad_norm": 3.2018189430236816, "learning_rate": 1e-06, "loss": -0.0753, "num_tokens": 180670387.0, "reward": 0.5212053656578064, "reward_std": 0.1738969385623932, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1197 }, { "clip_ratio/high_max": 0.0032099701420520432, "clip_ratio/high_mean": 0.0012308413952268893, "clip_ratio/low_mean": 0.0007118798021110706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019427212318987586, "epoch": 2.7979002624671914, "grad_norm": 0.5896627306938171, "learning_rate": 1e-06, "loss": -0.0755, "step": 1198 }, { "clip_ratio/high_max": 0.003903558201272972, "clip_ratio/high_mean": 0.001447825050490792, "clip_ratio/low_mean": 0.0009633471818233375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002411172212305246, "epoch": 2.800233304170312, "grad_norm": 0.6933762431144714, "learning_rate": 1e-06, "loss": -0.0756, "step": 1199 }, { "clip_ratio/high_max": 0.003815401163592469, "clip_ratio/high_mean": 0.001496369839514955, "clip_ratio/low_mean": 0.0010609604778437642, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002557330379204359, "epoch": 2.8025663458734327, "grad_norm": 0.347586065530777, "learning_rate": 1e-06, "loss": -0.0758, "step": 1200 }, { "clip_ratio/high_max": 0.0024983824005175848, "clip_ratio/high_mean": 0.0009618934927857481, "clip_ratio/low_mean": 0.0004857996718783397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014476931137323845, "completions/clipped_ratio": 0.1886160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3885.0, "completions/mean_length": 1289.109375, "completions/mean_terminated_length": 636.6134643554688, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.804899387576553, "grad_norm": 0.5528049468994141, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 181213853.0, "reward": 0.512276828289032, "reward_std": 0.15356901288032532, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 1201 }, { "clip_ratio/high_max": 0.0034367400003247894, "clip_ratio/high_mean": 0.0012816650141758146, "clip_ratio/low_mean": 0.0007585158200527076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020401808142196387, "epoch": 2.8072324292796735, "grad_norm": 2.007758378982544, "learning_rate": 1e-06, "loss": -0.0532, "step": 1202 }, { "clip_ratio/high_max": 0.002915565826697275, "clip_ratio/high_mean": 0.0011328729524393566, "clip_ratio/low_mean": 0.0008909382286219625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020238111683283933, "epoch": 2.8095654709827937, "grad_norm": 0.5470172762870789, "learning_rate": 1e-06, "loss": -0.0534, "step": 1203 }, { "clip_ratio/high_max": 0.0028556916950037703, "clip_ratio/high_mean": 0.0011424536041886313, "clip_ratio/low_mean": 0.0009186199586110888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002061073580989614, "epoch": 2.8118985126859144, "grad_norm": 0.3074507713317871, "learning_rate": 1e-06, "loss": -0.0536, "step": 1204 }, { "clip_ratio/high_max": 0.002890833442506846, "clip_ratio/high_mean": 0.001063974850694649, "clip_ratio/low_mean": 0.0007860665500629693, "clip_ratio/low_min": 2.454022796882782e-05, "clip_ratio/region_mean": 0.0018500413971196394, "completions/clipped_ratio": 0.2466517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3500.0, "completions/mean_length": 1491.719970703125, "completions/mean_terminated_length": 639.0592651367188, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 2.8142315543890346, "grad_norm": 0.6052800416946411, "learning_rate": 1e-06, "loss": -0.0946, "num_tokens": 181728906.0, "reward": 0.5066964626312256, "reward_std": 0.17581695318222046, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1205 }, { "clip_ratio/high_max": 0.0036665277657448314, "clip_ratio/high_mean": 0.0013032855385972653, "clip_ratio/low_mean": 0.0010810768744704546, "clip_ratio/low_min": 2.826775198627729e-05, "clip_ratio/region_mean": 0.0023843623785069212, "epoch": 2.8165645960921553, "grad_norm": 0.518386721611023, "learning_rate": 1e-06, "loss": -0.095, "step": 1206 }, { "clip_ratio/high_max": 0.00322691781911999, "clip_ratio/high_mean": 0.001270668344659498, "clip_ratio/low_mean": 0.0013895373103878228, "clip_ratio/low_min": 7.457420724676922e-05, "clip_ratio/region_mean": 0.002660205660504289, "epoch": 2.8188976377952755, "grad_norm": 7.10248327255249, "learning_rate": 1e-06, "loss": -0.0945, "step": 1207 }, { "clip_ratio/high_max": 0.002730156877078116, "clip_ratio/high_mean": 0.0009974023487302475, "clip_ratio/low_mean": 0.0013813719706377015, "clip_ratio/low_min": 6.115733231126796e-05, "clip_ratio/region_mean": 0.0023787742757122032, "epoch": 2.821230679498396, "grad_norm": 0.38240835070610046, "learning_rate": 1e-06, "loss": -0.0951, "step": 1208 }, { "clip_ratio/high_max": 0.002463452867232263, "clip_ratio/high_mean": 0.0010719201964093372, "clip_ratio/low_mean": 0.0006437868869397789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017157070396933705, "completions/clipped_ratio": 0.25, "completions/max_length": 4096.0, "completions/max_terminated_length": 3914.0, "completions/mean_length": 1539.4051513671875, "completions/mean_terminated_length": 687.2068481445312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.8235637212015163, "grad_norm": 0.3710300624370575, "learning_rate": 1e-06, "loss": -0.1162, "num_tokens": 182280573.0, "reward": 0.4877232313156128, "reward_std": 0.18824909627437592, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.5001283884048462, "step": 1209 }, { "clip_ratio/high_max": 0.002727828010392841, "clip_ratio/high_mean": 0.0011866736931551713, "clip_ratio/low_mean": 0.0007590260884171585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019456997833913192, "epoch": 2.825896762904637, "grad_norm": 1.6543059349060059, "learning_rate": 1e-06, "loss": -0.1162, "step": 1210 }, { "clip_ratio/high_max": 0.0026369119659648277, "clip_ratio/high_mean": 0.0011125377532152925, "clip_ratio/low_mean": 0.000776829268943402, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001889367020339705, "epoch": 2.8282298046077576, "grad_norm": 0.4664285480976105, "learning_rate": 1e-06, "loss": -0.1165, "step": 1211 }, { "clip_ratio/high_max": 0.0029276588538778014, "clip_ratio/high_mean": 0.0011920896104129497, "clip_ratio/low_mean": 0.0010239403600280639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022160300504765473, "epoch": 2.830562846310878, "grad_norm": 0.278658926486969, "learning_rate": 1e-06, "loss": -0.1166, "step": 1212 }, { "clip_ratio/high_max": 0.0035842242868966423, "clip_ratio/high_mean": 0.001387277152389288, "clip_ratio/low_mean": 0.0006110582526162034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019983353704446927, "completions/clipped_ratio": 0.2131696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3644.0, "completions/mean_length": 1351.497802734375, "completions/mean_terminated_length": 607.9517822265625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.832895888013998, "grad_norm": 0.4488239288330078, "learning_rate": 1e-06, "loss": -0.095, "num_tokens": 182800203.0, "reward": 0.590401828289032, "reward_std": 0.18306472897529602, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1213 }, { "clip_ratio/high_max": 0.003537690223311074, "clip_ratio/high_mean": 0.0014302880736067891, "clip_ratio/low_mean": 0.0006969175046833698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021272056183079258, "epoch": 2.8352289297171187, "grad_norm": 2.7161247730255127, "learning_rate": 1e-06, "loss": -0.0948, "step": 1214 }, { "clip_ratio/high_max": 0.00353987258131383, "clip_ratio/high_mean": 0.0013497739309968892, "clip_ratio/low_mean": 0.0008653298646095209, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022151037701405585, "epoch": 2.8375619714202394, "grad_norm": 0.32834717631340027, "learning_rate": 1e-06, "loss": -0.0953, "step": 1215 }, { "clip_ratio/high_max": 0.0032589790935162455, "clip_ratio/high_mean": 0.0013617537406389602, "clip_ratio/low_mean": 0.0010463480684848037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002408101827313658, "epoch": 2.8398950131233596, "grad_norm": 0.3416118919849396, "learning_rate": 1e-06, "loss": -0.0953, "step": 1216 }, { "clip_ratio/high_max": 0.002480594426742755, "clip_ratio/high_mean": 0.0010118863101524767, "clip_ratio/low_mean": 0.0006949146099941572, "clip_ratio/low_min": 1.1279552381893154e-05, "clip_ratio/region_mean": 0.0017068009510694537, "completions/clipped_ratio": 0.2399553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3146.0, "completions/mean_length": 1456.9554443359375, "completions/mean_terminated_length": 623.7767944335938, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 2.8422280548264798, "grad_norm": 1.9922235012054443, "learning_rate": 1e-06, "loss": -0.0787, "num_tokens": 183324259.0, "reward": 0.4810267984867096, "reward_std": 0.16841085255146027, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991896748542786, "step": 1217 }, { "clip_ratio/high_max": 0.002733574088779278, "clip_ratio/high_mean": 0.001132412182414555, "clip_ratio/low_mean": 0.0008736134495848091, "clip_ratio/low_min": 2.255910476378631e-05, "clip_ratio/region_mean": 0.002006025635637343, "epoch": 2.8445610965296004, "grad_norm": 0.5489809513092041, "learning_rate": 1e-06, "loss": -0.079, "step": 1218 }, { "clip_ratio/high_max": 0.0031566584948450327, "clip_ratio/high_mean": 0.0011629366235865746, "clip_ratio/low_mean": 0.0009047260209626984, "clip_ratio/low_min": 2.5095361706917174e-05, "clip_ratio/region_mean": 0.0020676626372733153, "epoch": 2.846894138232721, "grad_norm": 0.41222429275512695, "learning_rate": 1e-06, "loss": -0.0793, "step": 1219 }, { "clip_ratio/high_max": 0.0032130885228980333, "clip_ratio/high_mean": 0.0011981366260442883, "clip_ratio/low_mean": 0.0012378124192764517, "clip_ratio/low_min": 6.273840699577704e-05, "clip_ratio/region_mean": 0.0024359490562346764, "epoch": 2.8492271799358413, "grad_norm": 0.28211748600006104, "learning_rate": 1e-06, "loss": -0.0794, "step": 1220 }, { "clip_ratio/high_max": 0.0030643360369140282, "clip_ratio/high_mean": 0.0012038499444315676, "clip_ratio/low_mean": 0.0008457938511128305, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020496437937254086, "completions/clipped_ratio": 0.2120535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3134.0, "completions/mean_length": 1342.1785888671875, "completions/mean_terminated_length": 601.0651245117188, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 2.851560221638962, "grad_norm": 3.534780502319336, "learning_rate": 1e-06, "loss": -0.0813, "num_tokens": 183831747.0, "reward": 0.4988839626312256, "reward_std": 0.1821310967206955, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1221 }, { "clip_ratio/high_max": 0.003080540285736788, "clip_ratio/high_mean": 0.0012711458311969182, "clip_ratio/low_mean": 0.0008333145033248002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021044603636255488, "epoch": 2.853893263342082, "grad_norm": 0.9370055198669434, "learning_rate": 1e-06, "loss": -0.0816, "step": 1222 }, { "clip_ratio/high_max": 0.003431499018915929, "clip_ratio/high_mean": 0.0014178783167153597, "clip_ratio/low_mean": 0.0011217320970899891, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025396103810635395, "epoch": 2.856226305045203, "grad_norm": 0.8093620538711548, "learning_rate": 1e-06, "loss": -0.0819, "step": 1223 }, { "clip_ratio/high_max": 0.0034816244697140064, "clip_ratio/high_mean": 0.0014407533417397644, "clip_ratio/low_mean": 0.0011941119792027166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026348653045715764, "epoch": 2.858559346748323, "grad_norm": 0.5007116794586182, "learning_rate": 1e-06, "loss": -0.0821, "step": 1224 }, { "clip_ratio/high_max": 0.002486470984877087, "clip_ratio/high_mean": 0.000878848464708426, "clip_ratio/low_mean": 0.0007101492537913145, "clip_ratio/low_min": 2.468404454702977e-05, "clip_ratio/region_mean": 0.0015889976857579313, "completions/clipped_ratio": 0.2165178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 1352.4654541015625, "completions/mean_terminated_length": 594.2806396484375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.8608923884514437, "grad_norm": 45.36826705932617, "learning_rate": 1e-06, "loss": -0.0926, "num_tokens": 184332492.0, "reward": 0.5212053656578064, "reward_std": 0.15135660767555237, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1225 }, { "clip_ratio/high_max": 0.0025407037101103924, "clip_ratio/high_mean": 0.0008987198780232575, "clip_ratio/low_mean": 0.0007413639395963401, "clip_ratio/low_min": 3.759963874472305e-05, "clip_ratio/region_mean": 0.001640083864913322, "epoch": 2.863225430154564, "grad_norm": 0.33519247174263, "learning_rate": 1e-06, "loss": -0.0948, "step": 1226 }, { "clip_ratio/high_max": 0.003139946042210795, "clip_ratio/high_mean": 0.0010803164968820056, "clip_ratio/low_mean": 0.0009087411090149544, "clip_ratio/low_min": 2.0388191842357628e-05, "clip_ratio/region_mean": 0.0019890575858880766, "epoch": 2.8655584718576845, "grad_norm": 2.7825427055358887, "learning_rate": 1e-06, "loss": -0.0948, "step": 1227 }, { "clip_ratio/high_max": 0.0027594845087151043, "clip_ratio/high_mean": 0.0008938860219132039, "clip_ratio/low_mean": 0.0009284342413593549, "clip_ratio/low_min": 4.0400776924798265e-05, "clip_ratio/region_mean": 0.0018223202641820535, "epoch": 2.8678915135608047, "grad_norm": 0.42107686400413513, "learning_rate": 1e-06, "loss": -0.095, "step": 1228 }, { "clip_ratio/high_max": 0.002080782134726178, "clip_ratio/high_mean": 0.0008065612182690529, "clip_ratio/low_mean": 0.0006177257600938901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014242869765439536, "completions/clipped_ratio": 0.2042410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 1322.6763916015625, "completions/mean_terminated_length": 610.8695678710938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.8702245552639254, "grad_norm": 0.4197658598423004, "learning_rate": 1e-06, "loss": -0.0864, "num_tokens": 184854850.0, "reward": 0.5212053656578064, "reward_std": 0.14384102821350098, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1229 }, { "clip_ratio/high_max": 0.002723781086388044, "clip_ratio/high_mean": 0.0009233824966941029, "clip_ratio/low_mean": 0.0006971441243877052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016205266329052392, "epoch": 2.872557596967046, "grad_norm": 0.4416787028312683, "learning_rate": 1e-06, "loss": -0.0866, "step": 1230 }, { "clip_ratio/high_max": 0.0028616251693165395, "clip_ratio/high_mean": 0.0009955005862138933, "clip_ratio/low_mean": 0.0008328178791998653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018283184799656738, "epoch": 2.8748906386701663, "grad_norm": 0.23698417842388153, "learning_rate": 1e-06, "loss": -0.0868, "step": 1231 }, { "clip_ratio/high_max": 0.002497818393749185, "clip_ratio/high_mean": 0.0008923851000872673, "clip_ratio/low_mean": 0.0009950727835530415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018874578963732347, "epoch": 2.8772236803732865, "grad_norm": 0.5121062397956848, "learning_rate": 1e-06, "loss": -0.0868, "step": 1232 }, { "clip_ratio/high_max": 0.0029430376962409355, "clip_ratio/high_mean": 0.001153072553279344, "clip_ratio/low_mean": 0.0005323049263097346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016853774795890786, "completions/clipped_ratio": 0.2165178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 1382.53466796875, "completions/mean_terminated_length": 632.6595458984375, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 2.879556722076407, "grad_norm": 0.8464379906654358, "learning_rate": 1e-06, "loss": -0.0936, "num_tokens": 185386113.0, "reward": 0.4720982313156128, "reward_std": 0.18550607562065125, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1233 }, { "clip_ratio/high_max": 0.003369325728272088, "clip_ratio/high_mean": 0.0013082878358545713, "clip_ratio/low_mean": 0.0006272771170188207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019355649201315828, "epoch": 2.8818897637795278, "grad_norm": 0.2877429127693176, "learning_rate": 1e-06, "loss": -0.0938, "step": 1234 }, { "clip_ratio/high_max": 0.003318865223263856, "clip_ratio/high_mean": 0.0013241740944067715, "clip_ratio/low_mean": 0.0006955674680284574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002019741565163713, "epoch": 2.884222805482648, "grad_norm": 0.2611582577228546, "learning_rate": 1e-06, "loss": -0.0939, "step": 1235 }, { "clip_ratio/high_max": 0.0030736109692952596, "clip_ratio/high_mean": 0.0012730339039990213, "clip_ratio/low_mean": 0.0009009982049974496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002174032124457881, "epoch": 2.886555847185768, "grad_norm": 0.30253124237060547, "learning_rate": 1e-06, "loss": -0.094, "step": 1236 }, { "clip_ratio/high_max": 0.0028210761593072675, "clip_ratio/high_mean": 0.0011936814044020139, "clip_ratio/low_mean": 0.0010697407233237755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022634221386397257, "completions/clipped_ratio": 0.2120535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3701.0, "completions/mean_length": 1424.73779296875, "completions/mean_terminated_length": 705.8427734375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 2.888888888888889, "grad_norm": 0.537609338760376, "learning_rate": 1e-06, "loss": -0.089, "num_tokens": 185969774.0, "reward": 0.478794664144516, "reward_std": 0.2011791169643402, "rewards/verify_math_reward/mean": 0.4787946343421936, "rewards/verify_math_reward/std": 0.49982911348342896, "step": 1237 }, { "clip_ratio/high_max": 0.0031311173224821687, "clip_ratio/high_mean": 0.0013510836361092515, "clip_ratio/low_mean": 0.0011667837898130529, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025178674259223044, "epoch": 2.8912219305920095, "grad_norm": 0.41552361845970154, "learning_rate": 1e-06, "loss": -0.0892, "step": 1238 }, { "clip_ratio/high_max": 0.0032139801696757786, "clip_ratio/high_mean": 0.0013723202100663912, "clip_ratio/low_mean": 0.0014265503705246374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027988705769530497, "epoch": 2.8935549722951297, "grad_norm": 0.31522491574287415, "learning_rate": 1e-06, "loss": -0.0895, "step": 1239 }, { "clip_ratio/high_max": 0.0030999285736470483, "clip_ratio/high_mean": 0.0013347479034564458, "clip_ratio/low_mean": 0.0015425061610585544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028772539881174453, "epoch": 2.8958880139982504, "grad_norm": 0.37230515480041504, "learning_rate": 1e-06, "loss": -0.0896, "step": 1240 }, { "clip_ratio/high_max": 0.0029205890532466583, "clip_ratio/high_mean": 0.00104423633456463, "clip_ratio/low_mean": 0.00040260681134896004, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001446843165467726, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3714.0, "completions/mean_length": 1234.536865234375, "completions/mean_terminated_length": 583.8479614257812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.8982210557013706, "grad_norm": 5.425152778625488, "learning_rate": 1e-06, "loss": -0.0854, "num_tokens": 186495167.0, "reward": 0.5959821939468384, "reward_std": 0.13846048712730408, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 1241 }, { "clip_ratio/high_max": 0.0028035906034347136, "clip_ratio/high_mean": 0.001015445196571818, "clip_ratio/low_mean": 0.0004594717684085481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001474916974984808, "epoch": 2.900554097404491, "grad_norm": 0.3006778061389923, "learning_rate": 1e-06, "loss": -0.0859, "step": 1242 }, { "clip_ratio/high_max": 0.0033897928879014216, "clip_ratio/high_mean": 0.001207507191793411, "clip_ratio/low_mean": 0.0004978621097961877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017053693045454565, "epoch": 2.9028871391076114, "grad_norm": 0.9478268027305603, "learning_rate": 1e-06, "loss": -0.086, "step": 1243 }, { "clip_ratio/high_max": 0.0032955030619632453, "clip_ratio/high_mean": 0.0011920046526938677, "clip_ratio/low_mean": 0.0006095888402342098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018015934801951516, "epoch": 2.905220180810732, "grad_norm": 0.2952406704425812, "learning_rate": 1e-06, "loss": -0.0862, "step": 1244 }, { "clip_ratio/high_max": 0.0038782509363954887, "clip_ratio/high_mean": 0.0011188099451828748, "clip_ratio/low_mean": 0.0005431610725281644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016619710695522372, "completions/clipped_ratio": 0.234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 1405.20654296875, "completions/mean_terminated_length": 581.4942016601562, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.9075532225138523, "grad_norm": 0.9449260234832764, "learning_rate": 1e-06, "loss": -0.0835, "num_tokens": 186984416.0, "reward": 0.5691964626312256, "reward_std": 0.1555236279964447, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1245 }, { "clip_ratio/high_max": 0.005117442677146755, "clip_ratio/high_mean": 0.0014180850230331998, "clip_ratio/low_mean": 0.0006414854678951087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002059570477285888, "epoch": 2.909886264216973, "grad_norm": 0.37127840518951416, "learning_rate": 1e-06, "loss": -0.0838, "step": 1246 }, { "clip_ratio/high_max": 0.004142906385823153, "clip_ratio/high_mean": 0.0012283294381632004, "clip_ratio/low_mean": 0.0008153209182637511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002043650427367538, "epoch": 2.912219305920093, "grad_norm": 0.2784992754459381, "learning_rate": 1e-06, "loss": -0.0839, "step": 1247 }, { "clip_ratio/high_max": 0.004981956837582402, "clip_ratio/high_mean": 0.0012748711087624542, "clip_ratio/low_mean": 0.0008956288711488014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021705000326619484, "epoch": 2.914552347623214, "grad_norm": 3.2660763263702393, "learning_rate": 1e-06, "loss": -0.0836, "step": 1248 }, { "clip_ratio/high_max": 0.0029971869153087027, "clip_ratio/high_mean": 0.0012247210070199799, "clip_ratio/low_mean": 0.00046903620295779547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016937572436290793, "completions/clipped_ratio": 0.2544642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3275.0, "completions/mean_length": 1516.810302734375, "completions/mean_terminated_length": 636.488037109375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.9168853893263345, "grad_norm": 3.574822187423706, "learning_rate": 1e-06, "loss": -0.145, "num_tokens": 187503246.0, "reward": 0.4776785969734192, "reward_std": 0.1710023432970047, "rewards/verify_math_reward/mean": 0.4776785671710968, "rewards/verify_math_reward/std": 0.4997805058956146, "step": 1249 }, { "clip_ratio/high_max": 0.0032328522065654397, "clip_ratio/high_mean": 0.0011326784260745626, "clip_ratio/low_mean": 0.0005380096763474285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016706881215213798, "epoch": 2.9192184310294547, "grad_norm": 2.3712987899780273, "learning_rate": 1e-06, "loss": -0.1441, "step": 1250 }, { "clip_ratio/high_max": 0.003412745238165371, "clip_ratio/high_mean": 0.0013647938576468732, "clip_ratio/low_mean": 0.0005849857375324063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019497795728966594, "epoch": 2.921551472732575, "grad_norm": 0.3476744294166565, "learning_rate": 1e-06, "loss": -0.1454, "step": 1251 }, { "clip_ratio/high_max": 0.0034699178504524752, "clip_ratio/high_mean": 0.0012866850302089006, "clip_ratio/low_mean": 0.0007549787342213676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020416637999005616, "epoch": 2.9238845144356955, "grad_norm": 0.3368608057498932, "learning_rate": 1e-06, "loss": -0.1456, "step": 1252 }, { "clip_ratio/high_max": 0.002854582453437615, "clip_ratio/high_mean": 0.0011498095773276873, "clip_ratio/low_mean": 0.000816842652966443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019666521693579853, "completions/clipped_ratio": 0.2589285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 1553.8148193359375, "completions/mean_terminated_length": 665.581298828125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.926217556138816, "grad_norm": 1.5981898307800293, "learning_rate": 1e-06, "loss": -0.1475, "num_tokens": 188034664.0, "reward": 0.4419642984867096, "reward_std": 0.19017165899276733, "rewards/verify_math_reward/mean": 0.4419642984867096, "rewards/verify_math_reward/std": 0.49689781665802, "step": 1253 }, { "clip_ratio/high_max": 0.0038424870690505486, "clip_ratio/high_mean": 0.0015051018926897086, "clip_ratio/low_mean": 0.0009940451673173811, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024991470636450686, "epoch": 2.9285505978419364, "grad_norm": 0.5476817488670349, "learning_rate": 1e-06, "loss": -0.148, "step": 1254 }, { "clip_ratio/high_max": 0.003704722890688572, "clip_ratio/high_mean": 0.0015077460284373956, "clip_ratio/low_mean": 0.001038355914715794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002546101932239253, "epoch": 2.9308836395450566, "grad_norm": 0.3283585011959076, "learning_rate": 1e-06, "loss": -0.1482, "step": 1255 }, { "clip_ratio/high_max": 0.003660922549897805, "clip_ratio/high_mean": 0.0014118404742475832, "clip_ratio/low_mean": 0.0011804327368736267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002592273252957966, "epoch": 2.9332166812481772, "grad_norm": 0.2889481484889984, "learning_rate": 1e-06, "loss": -0.1483, "step": 1256 }, { "clip_ratio/high_max": 0.0026269399240845814, "clip_ratio/high_mean": 0.0011264109125477262, "clip_ratio/low_mean": 0.0004641539844669751, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015905649088381324, "completions/clipped_ratio": 0.2087053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3807.0, "completions/mean_length": 1348.33935546875, "completions/mean_terminated_length": 623.6389770507812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.935549722951298, "grad_norm": 0.3987446129322052, "learning_rate": 1e-06, "loss": -0.1032, "num_tokens": 188571536.0, "reward": 0.5078125, "reward_std": 0.16701938211917877, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1257 }, { "clip_ratio/high_max": 0.00304347783094272, "clip_ratio/high_mean": 0.001191147843201179, "clip_ratio/low_mean": 0.0006482329918071628, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018393808204564266, "epoch": 2.937882764654418, "grad_norm": 0.3016304075717926, "learning_rate": 1e-06, "loss": -0.1034, "step": 1258 }, { "clip_ratio/high_max": 0.002994050861161668, "clip_ratio/high_mean": 0.0011779243286582641, "clip_ratio/low_mean": 0.0007734036080364604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001951327889401, "epoch": 2.9402158063575388, "grad_norm": 0.2555900812149048, "learning_rate": 1e-06, "loss": -0.1036, "step": 1259 }, { "clip_ratio/high_max": 0.0029672253658645786, "clip_ratio/high_mean": 0.0012008029843855184, "clip_ratio/low_mean": 0.0008398569625569507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002040659928752575, "epoch": 2.942548848060659, "grad_norm": 0.35908064246177673, "learning_rate": 1e-06, "loss": -0.1036, "step": 1260 }, { "clip_ratio/high_max": 0.0031358182459371164, "clip_ratio/high_mean": 0.0012795805050700437, "clip_ratio/low_mean": 0.0006781714309909148, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001957751970621757, "completions/clipped_ratio": 0.2087053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2495.0, "completions/mean_length": 1318.00341796875, "completions/mean_terminated_length": 585.3018798828125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.9448818897637796, "grad_norm": 1.2461479902267456, "learning_rate": 1e-06, "loss": -0.0908, "num_tokens": 189081587.0, "reward": 0.5323660969734192, "reward_std": 0.16912692785263062, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1261 }, { "clip_ratio/high_max": 0.0036343886677059345, "clip_ratio/high_mean": 0.001438192786736181, "clip_ratio/low_mean": 0.0008761560156926862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00231434874876868, "epoch": 2.9472149314669, "grad_norm": 0.4860003590583801, "learning_rate": 1e-06, "loss": -0.0911, "step": 1262 }, { "clip_ratio/high_max": 0.003681395472085569, "clip_ratio/high_mean": 0.0014559881819877774, "clip_ratio/low_mean": 0.0009327005082013784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023886886847321875, "epoch": 2.9495479731700205, "grad_norm": 0.4207174777984619, "learning_rate": 1e-06, "loss": -0.0912, "step": 1263 }, { "clip_ratio/high_max": 0.004041636348119937, "clip_ratio/high_mean": 0.001519612498668721, "clip_ratio/low_mean": 0.0011997740311926464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027193865389563143, "epoch": 2.9518810148731407, "grad_norm": 0.504676878452301, "learning_rate": 1e-06, "loss": -0.0914, "step": 1264 }, { "clip_ratio/high_max": 0.0033926431569852866, "clip_ratio/high_mean": 0.0014352911020978354, "clip_ratio/low_mean": 0.0006420723038900178, "clip_ratio/low_min": 1.641281596675981e-05, "clip_ratio/region_mean": 0.002077363409625832, "completions/clipped_ratio": 0.2332589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3836.0, "completions/mean_length": 1477.0235595703125, "completions/mean_terminated_length": 680.275146484375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 2.9542140565762613, "grad_norm": 0.3963419198989868, "learning_rate": 1e-06, "loss": -0.1143, "num_tokens": 189636248.0, "reward": 0.4854910969734192, "reward_std": 0.19422738254070282, "rewards/verify_math_reward/mean": 0.4854910671710968, "rewards/verify_math_reward/std": 0.5000686049461365, "step": 1265 }, { "clip_ratio/high_max": 0.0037605531979352236, "clip_ratio/high_mean": 0.0015490402875002474, "clip_ratio/low_mean": 0.0007817827336111804, "clip_ratio/low_min": 1.2786415936716367e-05, "clip_ratio/region_mean": 0.002330822950170841, "epoch": 2.9565470982793816, "grad_norm": 0.3466018736362457, "learning_rate": 1e-06, "loss": -0.1145, "step": 1266 }, { "clip_ratio/high_max": 0.0035368386015761644, "clip_ratio/high_mean": 0.0016083992486528587, "clip_ratio/low_mean": 0.0009373952161695343, "clip_ratio/low_min": 1.641281596675981e-05, "clip_ratio/region_mean": 0.002545794479374308, "epoch": 2.958880139982502, "grad_norm": 0.4694117605686188, "learning_rate": 1e-06, "loss": -0.1147, "step": 1267 }, { "clip_ratio/high_max": 0.0038306224887492135, "clip_ratio/high_mean": 0.0015597249403072055, "clip_ratio/low_mean": 0.0009886663101497106, "clip_ratio/low_min": 1.641281596675981e-05, "clip_ratio/region_mean": 0.002548391254094895, "epoch": 2.961213181685623, "grad_norm": 0.29548385739326477, "learning_rate": 1e-06, "loss": -0.1148, "step": 1268 }, { "clip_ratio/high_max": 0.0028239434359420557, "clip_ratio/high_mean": 0.0010628133077261737, "clip_ratio/low_mean": 0.0006364957262121607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016993090648611542, "completions/clipped_ratio": 0.1953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 1290.890625, "completions/mean_terminated_length": 610.0388793945312, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.963546223388743, "grad_norm": 0.4561242163181305, "learning_rate": 1e-06, "loss": -0.091, "num_tokens": 190165238.0, "reward": 0.5412946939468384, "reward_std": 0.16810522973537445, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1269 }, { "clip_ratio/high_max": 0.0033380371241946705, "clip_ratio/high_mean": 0.0012396585225360468, "clip_ratio/low_mean": 0.0008403555057157064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002080014040984679, "epoch": 2.9658792650918633, "grad_norm": 0.8402196764945984, "learning_rate": 1e-06, "loss": -0.0911, "step": 1270 }, { "clip_ratio/high_max": 0.003374462183273863, "clip_ratio/high_mean": 0.0012676964470301755, "clip_ratio/low_mean": 0.001015200228721369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022828966975794174, "epoch": 2.968212306794984, "grad_norm": 0.3863134980201721, "learning_rate": 1e-06, "loss": -0.0914, "step": 1271 }, { "clip_ratio/high_max": 0.0029636258477694355, "clip_ratio/high_mean": 0.001092220703867497, "clip_ratio/low_mean": 0.0011317469288769644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002223967618192546, "epoch": 2.9705453484981046, "grad_norm": 0.31542837619781494, "learning_rate": 1e-06, "loss": -0.0914, "step": 1272 }, { "clip_ratio/high_max": 0.0031185169718810357, "clip_ratio/high_mean": 0.0011879071043949807, "clip_ratio/low_mean": 0.0006433326623209723, "clip_ratio/low_min": 3.7005089325248264e-05, "clip_ratio/region_mean": 0.0018312397878617048, "completions/clipped_ratio": 0.1975446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3402.0, "completions/mean_length": 1319.029052734375, "completions/mean_terminated_length": 635.4075317382812, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.972878390201225, "grad_norm": 3.8337574005126953, "learning_rate": 1e-06, "loss": -0.0771, "num_tokens": 190711880.0, "reward": 0.535714328289032, "reward_std": 0.16273610293865204, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1273 }, { "clip_ratio/high_max": 0.0035582603741204366, "clip_ratio/high_mean": 0.0012687833732343279, "clip_ratio/low_mean": 0.0006148775719339028, "clip_ratio/low_min": 1.5363815691671334e-05, "clip_ratio/region_mean": 0.001883660937892273, "epoch": 2.9752114319043454, "grad_norm": 2.476362466812134, "learning_rate": 1e-06, "loss": -0.0772, "step": 1274 }, { "clip_ratio/high_max": 0.003701747198647354, "clip_ratio/high_mean": 0.0013226830524217803, "clip_ratio/low_mean": 0.0007646716958333855, "clip_ratio/low_min": 5.4717618695576675e-05, "clip_ratio/region_mean": 0.002087354805553332, "epoch": 2.9775444736074657, "grad_norm": 0.5153287649154663, "learning_rate": 1e-06, "loss": -0.0774, "step": 1275 }, { "clip_ratio/high_max": 0.003274621965829283, "clip_ratio/high_mean": 0.0012904019531561062, "clip_ratio/low_mean": 0.001012168635497801, "clip_ratio/low_min": 8.264489588327706e-05, "clip_ratio/region_mean": 0.002302570537722204, "epoch": 2.9798775153105863, "grad_norm": 6.223364353179932, "learning_rate": 1e-06, "loss": -0.0771, "step": 1276 }, { "clip_ratio/high_max": 0.004495812056120485, "clip_ratio/high_mean": 0.0017977641764446162, "clip_ratio/low_mean": 0.0007139533545341692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025117175027844496, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3326.0, "completions/mean_length": 1253.3560791015625, "completions/mean_terminated_length": 667.9932861328125, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 2.9822105570137065, "grad_norm": 0.6470964550971985, "learning_rate": 1e-06, "loss": -0.1133, "num_tokens": 191304903.0, "reward": 0.551339328289032, "reward_std": 0.23346063494682312, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1277 }, { "clip_ratio/high_max": 0.004212733081658371, "clip_ratio/high_mean": 0.0019265599803475197, "clip_ratio/low_mean": 0.0009817027457756922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002908262802520767, "epoch": 2.984543598716827, "grad_norm": 2.3688466548919678, "learning_rate": 1e-06, "loss": -0.1135, "step": 1278 }, { "clip_ratio/high_max": 0.004278146618162282, "clip_ratio/high_mean": 0.0017412095694453456, "clip_ratio/low_mean": 0.0009955636160157155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002736773240030743, "epoch": 2.9868766404199474, "grad_norm": 0.4791395962238312, "learning_rate": 1e-06, "loss": -0.1136, "step": 1279 }, { "clip_ratio/high_max": 0.004824887466384098, "clip_ratio/high_mean": 0.0019561336885089986, "clip_ratio/low_mean": 0.0011255320205236785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003081665658100974, "epoch": 2.989209682123068, "grad_norm": 0.4142671525478363, "learning_rate": 1e-06, "loss": -0.1137, "step": 1280 }, { "clip_ratio/high_max": 0.0023622938388143666, "clip_ratio/high_mean": 0.0007891737004683819, "clip_ratio/low_mean": 0.0007215175246528815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001510691232397221, "completions/clipped_ratio": 0.2220982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 1407.5101318359375, "completions/mean_terminated_length": 639.9210815429688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.9915427238261882, "grad_norm": 3.3288042545318604, "learning_rate": 1e-06, "loss": -0.0546, "num_tokens": 191855024.0, "reward": 0.5301339626312256, "reward_std": 0.1300095021724701, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 1281 }, { "clip_ratio/high_max": 0.002428850046271691, "clip_ratio/high_mean": 0.0007916071117506362, "clip_ratio/low_mean": 0.0008628833511465928, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001654490471992176, "epoch": 2.993875765529309, "grad_norm": 0.6150650382041931, "learning_rate": 1e-06, "loss": -0.0548, "step": 1282 }, { "clip_ratio/high_max": 0.002492466075636912, "clip_ratio/high_mean": 0.0008316465336974943, "clip_ratio/low_mean": 0.001032637285788951, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018642838185769506, "epoch": 2.9962088072324295, "grad_norm": 1.9952679872512817, "learning_rate": 1e-06, "loss": -0.0541, "step": 1283 }, { "clip_ratio/high_max": 0.00255737060069805, "clip_ratio/high_mean": 0.0008179828801075928, "clip_ratio/low_mean": 0.0011301353833914618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019481182389426976, "epoch": 2.9985418489355498, "grad_norm": 0.37684494256973267, "learning_rate": 1e-06, "loss": -0.0551, "step": 1284 }, { "clip_ratio/high_max": 0.0029341260160435922, "clip_ratio/high_mean": 0.0010641481621860294, "clip_ratio/low_mean": 0.0007236095134430798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017877576901810244, "completions/clipped_ratio": 0.2779017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3802.0, "completions/mean_length": 1668.9498291015625, "completions/mean_terminated_length": 734.8917846679688, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 3.0023330417031207, "grad_norm": 0.3911517262458801, "learning_rate": 1e-06, "loss": -0.1085, "num_tokens": 192420787.0, "reward": 0.4151785969734192, "reward_std": 0.15898525714874268, "rewards/verify_math_reward/mean": 0.4151785671710968, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1285 }, { "clip_ratio/high_max": 0.0028529496339615434, "clip_ratio/high_mean": 0.001139554602559656, "clip_ratio/low_mean": 0.0008693054760442465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00200886010134127, "epoch": 3.004666083406241, "grad_norm": 0.36910468339920044, "learning_rate": 1e-06, "loss": -0.1086, "step": 1286 }, { "clip_ratio/high_max": 0.003003324512974359, "clip_ratio/high_mean": 0.0010881313883146504, "clip_ratio/low_mean": 0.000911905169232341, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002000036518438719, "epoch": 3.0069991251093615, "grad_norm": 0.3659312427043915, "learning_rate": 1e-06, "loss": -0.1088, "step": 1287 }, { "clip_ratio/high_max": 0.0030101057927822694, "clip_ratio/high_mean": 0.001150330477685202, "clip_ratio/low_mean": 0.0010788765339384554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002229207006166689, "epoch": 3.0093321668124817, "grad_norm": 0.359979510307312, "learning_rate": 1e-06, "loss": -0.1089, "step": 1288 }, { "clip_ratio/high_max": 0.002387522596109193, "clip_ratio/high_mean": 0.0008618655647296691, "clip_ratio/low_mean": 0.0004521233588548057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013139889379090164, "completions/clipped_ratio": 0.2232142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3897.0, "completions/mean_length": 1444.962158203125, "completions/mean_terminated_length": 683.1695556640625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 3.0116652085156024, "grad_norm": 0.37270867824554443, "learning_rate": 1e-06, "loss": -0.1103, "num_tokens": 192978769.0, "reward": 0.5424107313156128, "reward_std": 0.15349668264389038, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 1289 }, { "clip_ratio/high_max": 0.00278236748999916, "clip_ratio/high_mean": 0.0010238155191473197, "clip_ratio/low_mean": 0.0006697156052268838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016935311468841974, "epoch": 3.0139982502187226, "grad_norm": 0.35261374711990356, "learning_rate": 1e-06, "loss": -0.1104, "step": 1290 }, { "clip_ratio/high_max": 0.0025570792367943795, "clip_ratio/high_mean": 0.0009962566327885725, "clip_ratio/low_mean": 0.0005873585182598617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001583615146955708, "epoch": 3.0163312919218432, "grad_norm": 0.30153024196624756, "learning_rate": 1e-06, "loss": -0.1105, "step": 1291 }, { "clip_ratio/high_max": 0.002763312753813807, "clip_ratio/high_mean": 0.0010745744257292245, "clip_ratio/low_mean": 0.0008387730699723761, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019133475616399664, "epoch": 3.0186643336249634, "grad_norm": 0.33618199825286865, "learning_rate": 1e-06, "loss": -0.1106, "step": 1292 }, { "clip_ratio/high_max": 0.0029119778992026113, "clip_ratio/high_mean": 0.0011599048266361933, "clip_ratio/low_mean": 0.0008196167891583173, "clip_ratio/low_min": 3.745472440641606e-05, "clip_ratio/region_mean": 0.001979521613975521, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3663.0, "completions/mean_length": 1293.716552734375, "completions/mean_terminated_length": 651.7667846679688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.020997375328084, "grad_norm": 0.47452282905578613, "learning_rate": 1e-06, "loss": -0.0925, "num_tokens": 193543571.0, "reward": 0.5, "reward_std": 0.18667790293693542, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 1293 }, { "clip_ratio/high_max": 0.0034987436738447286, "clip_ratio/high_mean": 0.0012775198792951414, "clip_ratio/low_mean": 0.0010653285680746194, "clip_ratio/low_min": 5.783488813904114e-05, "clip_ratio/region_mean": 0.002342848434636835, "epoch": 3.0233304170312043, "grad_norm": 2.506450653076172, "learning_rate": 1e-06, "loss": -0.0925, "step": 1294 }, { "clip_ratio/high_max": 0.0029801191776641645, "clip_ratio/high_mean": 0.0011337745818309486, "clip_ratio/low_mean": 0.00117715048691025, "clip_ratio/low_min": 6.72269970891648e-05, "clip_ratio/region_mean": 0.002310925061465241, "epoch": 3.025663458734325, "grad_norm": 0.42971041798591614, "learning_rate": 1e-06, "loss": -0.0927, "step": 1295 }, { "clip_ratio/high_max": 0.0033721197833074257, "clip_ratio/high_mean": 0.0012715794437099248, "clip_ratio/low_mean": 0.0012779779863194562, "clip_ratio/low_min": 6.845593452453613e-05, "clip_ratio/region_mean": 0.0025495573790976778, "epoch": 3.027996500437445, "grad_norm": 0.5183848142623901, "learning_rate": 1e-06, "loss": -0.0928, "step": 1296 }, { "clip_ratio/high_max": 0.002582093635282945, "clip_ratio/high_mean": 0.000998998624709202, "clip_ratio/low_mean": 0.0008174694330591592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018164680841437075, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3809.0, "completions/mean_length": 1285.727783203125, "completions/mean_terminated_length": 641.9478759765625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 3.030329542140566, "grad_norm": 7.648484706878662, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 194102143.0, "reward": 0.5546875, "reward_std": 0.17033015191555023, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1297 }, { "clip_ratio/high_max": 0.0027869905898114666, "clip_ratio/high_mean": 0.001056800927472068, "clip_ratio/low_mean": 0.0008183976069631171, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018751984898699448, "epoch": 3.032662583843686, "grad_norm": 0.6819704174995422, "learning_rate": 1e-06, "loss": -0.0642, "step": 1298 }, { "clip_ratio/high_max": 0.002949386842374224, "clip_ratio/high_mean": 0.0011846597171825124, "clip_ratio/low_mean": 0.0009814565437409328, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002166116264561424, "epoch": 3.0349956255468067, "grad_norm": 0.913485050201416, "learning_rate": 1e-06, "loss": -0.0643, "step": 1299 }, { "clip_ratio/high_max": 0.0027444875886430964, "clip_ratio/high_mean": 0.0010722857896325877, "clip_ratio/low_mean": 0.0011374977730156388, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022097835899330676, "epoch": 3.037328667249927, "grad_norm": 0.28615206480026245, "learning_rate": 1e-06, "loss": -0.0646, "step": 1300 }, { "clip_ratio/high_max": 0.002170353542169323, "clip_ratio/high_mean": 0.0008116324315778911, "clip_ratio/low_mean": 0.0006237638644961407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014353962542372756, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 1179.6473388671875, "completions/mean_terminated_length": 607.279052734375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.0396617089530475, "grad_norm": 0.586928129196167, "learning_rate": 1e-06, "loss": -0.0645, "num_tokens": 194637259.0, "reward": 0.559151828289032, "reward_std": 0.14804832637310028, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1301 }, { "clip_ratio/high_max": 0.0024776921709417365, "clip_ratio/high_mean": 0.000969286016697879, "clip_ratio/low_mean": 0.0007513177370128687, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017206037919095252, "epoch": 3.041994750656168, "grad_norm": 0.29169341921806335, "learning_rate": 1e-06, "loss": -0.0647, "step": 1302 }, { "clip_ratio/high_max": 0.002486988771124743, "clip_ratio/high_mean": 0.0009817201116675278, "clip_ratio/low_mean": 0.0008248983631347073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018066184638882987, "epoch": 3.0443277923592884, "grad_norm": 0.3569551706314087, "learning_rate": 1e-06, "loss": -0.0647, "step": 1303 }, { "clip_ratio/high_max": 0.002440719836158678, "clip_ratio/high_mean": 0.0009145878611889202, "clip_ratio/low_mean": 0.0010206333245150745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019352211675141007, "epoch": 3.046660834062409, "grad_norm": 4.501830577850342, "learning_rate": 1e-06, "loss": -0.0647, "step": 1304 }, { "clip_ratio/high_max": 0.003202307274477789, "clip_ratio/high_mean": 0.0011969815550401108, "clip_ratio/low_mean": 0.0007392058876121155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019361874474270735, "completions/clipped_ratio": 0.2243303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2854.0, "completions/mean_length": 1419.7423095703125, "completions/mean_terminated_length": 645.7453002929688, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 3.0489938757655293, "grad_norm": 1.6234571933746338, "learning_rate": 1e-06, "loss": -0.082, "num_tokens": 195178204.0, "reward": 0.5022321939468384, "reward_std": 0.17731580138206482, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 1305 }, { "clip_ratio/high_max": 0.003333952307002619, "clip_ratio/high_mean": 0.001337966041319305, "clip_ratio/low_mean": 0.0008251814797404222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021631475319736637, "epoch": 3.05132691746865, "grad_norm": 1.6189180612564087, "learning_rate": 1e-06, "loss": -0.0819, "step": 1306 }, { "clip_ratio/high_max": 0.0038742744727642275, "clip_ratio/high_mean": 0.0013881174327252666, "clip_ratio/low_mean": 0.0010010786145357997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023891960372566245, "epoch": 3.05365995917177, "grad_norm": 0.41167744994163513, "learning_rate": 1e-06, "loss": -0.0823, "step": 1307 }, { "clip_ratio/high_max": 0.0041165611037286, "clip_ratio/high_mean": 0.0014948606749385362, "clip_ratio/low_mean": 0.0011751801412174245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026700407979660667, "epoch": 3.055993000874891, "grad_norm": 0.5535644292831421, "learning_rate": 1e-06, "loss": -0.0824, "step": 1308 }, { "clip_ratio/high_max": 0.0034429262741468847, "clip_ratio/high_mean": 0.0011909450295206625, "clip_ratio/low_mean": 0.0006535447446367471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018444898232701235, "completions/clipped_ratio": 0.2433035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3963.0, "completions/mean_length": 1461.59716796875, "completions/mean_terminated_length": 614.5471801757812, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 3.058326042578011, "grad_norm": 0.39447373151779175, "learning_rate": 1e-06, "loss": -0.0791, "num_tokens": 195687619.0, "reward": 0.4821428656578064, "reward_std": 0.15692119300365448, "rewards/verify_math_reward/mean": 0.4821428656578064, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1309 }, { "clip_ratio/high_max": 0.004243436094839126, "clip_ratio/high_mean": 0.001452445543691283, "clip_ratio/low_mean": 0.0008536635250493418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023061090760165825, "epoch": 3.0606590842811316, "grad_norm": 0.45697638392448425, "learning_rate": 1e-06, "loss": -0.0793, "step": 1310 }, { "clip_ratio/high_max": 0.003600805619498715, "clip_ratio/high_mean": 0.0013074753751425305, "clip_ratio/low_mean": 0.000984459627943579, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022919349939911626, "epoch": 3.062992125984252, "grad_norm": 5.094274044036865, "learning_rate": 1e-06, "loss": -0.0791, "step": 1311 }, { "clip_ratio/high_max": 0.0039643659110879526, "clip_ratio/high_mean": 0.00139178016979713, "clip_ratio/low_mean": 0.0010289519032085082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024207320748246275, "epoch": 3.0653251676873725, "grad_norm": 4.300194263458252, "learning_rate": 1e-06, "loss": -0.0792, "step": 1312 }, { "clip_ratio/high_max": 0.003239000419853255, "clip_ratio/high_mean": 0.001341113540547667, "clip_ratio/low_mean": 0.001100850122384145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024419636247330345, "completions/clipped_ratio": 0.2154017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 1327.1451416015625, "completions/mean_terminated_length": 566.9900512695312, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 3.0676582093904927, "grad_norm": 0.6317344903945923, "learning_rate": 1e-06, "loss": -0.0769, "num_tokens": 196177021.0, "reward": 0.5636160969734192, "reward_std": 0.1770554929971695, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1313 }, { "clip_ratio/high_max": 0.003980327142926399, "clip_ratio/high_mean": 0.0015061783269629814, "clip_ratio/low_mean": 0.0012826575111830607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027888358526979573, "epoch": 3.0699912510936134, "grad_norm": 0.5956067442893982, "learning_rate": 1e-06, "loss": -0.077, "step": 1314 }, { "clip_ratio/high_max": 0.003733900135557633, "clip_ratio/high_mean": 0.0015889468140812824, "clip_ratio/low_mean": 0.0014829656211077236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030719124479219317, "epoch": 3.0723242927967336, "grad_norm": 0.42429134249687195, "learning_rate": 1e-06, "loss": -0.0773, "step": 1315 }, { "clip_ratio/high_max": 0.0034938420430989936, "clip_ratio/high_mean": 0.0014516693954647053, "clip_ratio/low_mean": 0.0017957699819817208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032474394829478115, "epoch": 3.0746573344998542, "grad_norm": 0.3038265109062195, "learning_rate": 1e-06, "loss": -0.0775, "step": 1316 }, { "clip_ratio/high_max": 0.0036336439079605043, "clip_ratio/high_mean": 0.0010948284052574309, "clip_ratio/low_mean": 0.0006059797747184348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017008081886160653, "completions/clipped_ratio": 0.2254464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 1465.0692138671875, "completions/mean_terminated_length": 699.2939453125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 3.0769903762029744, "grad_norm": 77.05912780761719, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 196758067.0, "reward": 0.4810267984867096, "reward_std": 0.12576758861541748, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991896748542786, "step": 1317 }, { "clip_ratio/high_max": 0.00409939547535032, "clip_ratio/high_mean": 0.0012227179086039541, "clip_ratio/low_mean": 0.0005543528715179491, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017770708109310362, "epoch": 3.079323417906095, "grad_norm": 0.6013045907020569, "learning_rate": 1e-06, "loss": -0.0359, "step": 1318 }, { "clip_ratio/high_max": 0.0038174314540810883, "clip_ratio/high_mean": 0.0011618587323027896, "clip_ratio/low_mean": 0.0006722839120811841, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001834142611187417, "epoch": 3.0816564596092153, "grad_norm": 6.640134811401367, "learning_rate": 1e-06, "loss": -0.0073, "step": 1319 }, { "clip_ratio/high_max": 0.004250792811944848, "clip_ratio/high_mean": 0.0011968325889029074, "clip_ratio/low_mean": 0.0007120575455701328, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019088901244685985, "epoch": 3.083989501312336, "grad_norm": 0.6660827994346619, "learning_rate": 1e-06, "loss": -0.0361, "step": 1320 }, { "clip_ratio/high_max": 0.0027105453991680406, "clip_ratio/high_mean": 0.0011640797129075509, "clip_ratio/low_mean": 0.000636196518826182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018002762153628282, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 1110.1763916015625, "completions/mean_terminated_length": 630.5880737304688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.0863225430154566, "grad_norm": 0.526246964931488, "learning_rate": 1e-06, "loss": -0.0739, "num_tokens": 197331657.0, "reward": 0.5647321939468384, "reward_std": 0.1803184598684311, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 1321 }, { "clip_ratio/high_max": 0.003639784612460062, "clip_ratio/high_mean": 0.0014035208914719988, "clip_ratio/low_mean": 0.0007869348046369851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021904556415393017, "epoch": 3.088655584718577, "grad_norm": 0.8051074743270874, "learning_rate": 1e-06, "loss": -0.074, "step": 1322 }, { "clip_ratio/high_max": 0.00401924012840027, "clip_ratio/high_mean": 0.0014252144428610336, "clip_ratio/low_mean": 0.0010067234106827527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002431937784422189, "epoch": 3.0909886264216975, "grad_norm": 1.2478593587875366, "learning_rate": 1e-06, "loss": -0.0743, "step": 1323 }, { "clip_ratio/high_max": 0.0038865004025865346, "clip_ratio/high_mean": 0.0014551370732078794, "clip_ratio/low_mean": 0.0011158811812492786, "clip_ratio/low_min": 2.861721623048652e-05, "clip_ratio/region_mean": 0.0025710182744660415, "epoch": 3.0933216681248177, "grad_norm": 0.3759419023990631, "learning_rate": 1e-06, "loss": -0.0744, "step": 1324 }, { "clip_ratio/high_max": 0.0029256817797431722, "clip_ratio/high_mean": 0.0009558317105984315, "clip_ratio/low_mean": 0.0005636254718410783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001519457178801531, "completions/clipped_ratio": 0.1953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 1302.0379638671875, "completions/mean_terminated_length": 623.891845703125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 3.0956547098279383, "grad_norm": 0.5498407483100891, "learning_rate": 1e-06, "loss": -0.0768, "num_tokens": 197868699.0, "reward": 0.527901828289032, "reward_std": 0.1403779834508896, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1325 }, { "clip_ratio/high_max": 0.0028143778145022225, "clip_ratio/high_mean": 0.0009205680416926043, "clip_ratio/low_mean": 0.00075306263352104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016736306351958774, "epoch": 3.0979877515310585, "grad_norm": 0.3391469717025757, "learning_rate": 1e-06, "loss": -0.077, "step": 1326 }, { "clip_ratio/high_max": 0.003042175212613074, "clip_ratio/high_mean": 0.0009711971665637975, "clip_ratio/low_mean": 0.0008422520877502393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001813449249311816, "epoch": 3.100320793234179, "grad_norm": 0.5204840302467346, "learning_rate": 1e-06, "loss": -0.0772, "step": 1327 }, { "clip_ratio/high_max": 0.00311682893516263, "clip_ratio/high_mean": 0.0009919040949171176, "clip_ratio/low_mean": 0.0008912678840715671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018831720008165576, "epoch": 3.1026538349372994, "grad_norm": 0.4225916266441345, "learning_rate": 1e-06, "loss": -0.0771, "step": 1328 }, { "clip_ratio/high_max": 0.0024010761044337414, "clip_ratio/high_mean": 0.0009046482837220537, "clip_ratio/low_mean": 0.0006260396794459666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001530687961349031, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 1177.516845703125, "completions/mean_terminated_length": 571.7937622070312, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 3.10498687664042, "grad_norm": 0.4155738651752472, "learning_rate": 1e-06, "loss": -0.0661, "num_tokens": 198388298.0, "reward": 0.6004464626312256, "reward_std": 0.1431998312473297, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1329 }, { "clip_ratio/high_max": 0.0023531822953373194, "clip_ratio/high_mean": 0.0009863130944722798, "clip_ratio/low_mean": 0.000701033417499275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016873465465323534, "epoch": 3.1073199183435403, "grad_norm": 0.4991619288921356, "learning_rate": 1e-06, "loss": -0.0662, "step": 1330 }, { "clip_ratio/high_max": 0.0028163421593490057, "clip_ratio/high_mean": 0.0009882709582598181, "clip_ratio/low_mean": 0.0009355173870062572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019237883316236548, "epoch": 3.109652960046661, "grad_norm": 0.33833229541778564, "learning_rate": 1e-06, "loss": -0.0664, "step": 1331 }, { "clip_ratio/high_max": 0.0026305283754481934, "clip_ratio/high_mean": 0.0009672248070273781, "clip_ratio/low_mean": 0.0008498723664160934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018170972216466907, "epoch": 3.111986001749781, "grad_norm": 0.3478334844112396, "learning_rate": 1e-06, "loss": -0.0665, "step": 1332 }, { "clip_ratio/high_max": 0.0032606235035927966, "clip_ratio/high_mean": 0.0011810853866336402, "clip_ratio/low_mean": 0.000953174347159802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021342597538023256, "completions/clipped_ratio": 0.2053571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 1373.696533203125, "completions/mean_terminated_length": 670.1798095703125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 3.114319043452902, "grad_norm": 0.4556431174278259, "learning_rate": 1e-06, "loss": -0.0716, "num_tokens": 198966298.0, "reward": 0.4988839626312256, "reward_std": 0.1829138845205307, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1333 }, { "clip_ratio/high_max": 0.0036636417935369536, "clip_ratio/high_mean": 0.0013313579584064428, "clip_ratio/low_mean": 0.0011200340968571254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024513920725439675, "epoch": 3.116652085156022, "grad_norm": 0.8069571852684021, "learning_rate": 1e-06, "loss": -0.0718, "step": 1334 }, { "clip_ratio/high_max": 0.0036812563630519435, "clip_ratio/high_mean": 0.001404527993145166, "clip_ratio/low_mean": 0.0012969714152859524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027014994047931395, "epoch": 3.1189851268591426, "grad_norm": 0.3887385129928589, "learning_rate": 1e-06, "loss": -0.0719, "step": 1335 }, { "clip_ratio/high_max": 0.0035854177112923935, "clip_ratio/high_mean": 0.0013115198380546644, "clip_ratio/low_mean": 0.001449180623239954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002760700401267968, "epoch": 3.121318168562263, "grad_norm": 0.32441362738609314, "learning_rate": 1e-06, "loss": -0.0721, "step": 1336 }, { "clip_ratio/high_max": 0.0023320835607592016, "clip_ratio/high_mean": 0.0007925430090836016, "clip_ratio/low_mean": 0.0007216504782263655, "clip_ratio/low_min": 1.555113158246968e-05, "clip_ratio/region_mean": 0.0015141935036808718, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3436.0, "completions/mean_length": 1113.5982666015625, "completions/mean_terminated_length": 570.6279907226562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.1236512102653835, "grad_norm": 2.1635701656341553, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 199480378.0, "reward": 0.5870535969734192, "reward_std": 0.139706090092659, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1337 }, { "clip_ratio/high_max": 0.0021701776968257036, "clip_ratio/high_mean": 0.0007825951779523166, "clip_ratio/low_mean": 0.0008559379048165283, "clip_ratio/low_min": 1.555113158246968e-05, "clip_ratio/region_mean": 0.0016385330600314774, "epoch": 3.1259842519685037, "grad_norm": 0.5602055191993713, "learning_rate": 1e-06, "loss": -0.0453, "step": 1338 }, { "clip_ratio/high_max": 0.00277847711549839, "clip_ratio/high_mean": 0.00095491306274198, "clip_ratio/low_mean": 0.0008872840726326103, "clip_ratio/low_min": 1.650818740017712e-05, "clip_ratio/region_mean": 0.001842197154473979, "epoch": 3.1283172936716244, "grad_norm": 0.3577958941459656, "learning_rate": 1e-06, "loss": -0.0454, "step": 1339 }, { "clip_ratio/high_max": 0.0027952146192546934, "clip_ratio/high_mean": 0.000937847345994669, "clip_ratio/low_mean": 0.0012064802685927134, "clip_ratio/low_min": 1.555113158246968e-05, "clip_ratio/region_mean": 0.002144327612768393, "epoch": 3.130650335374745, "grad_norm": 0.28495895862579346, "learning_rate": 1e-06, "loss": -0.0458, "step": 1340 }, { "clip_ratio/high_max": 0.0022807037057646085, "clip_ratio/high_mean": 0.0008878827575244941, "clip_ratio/low_mean": 0.0007747445797576802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016626273354631849, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 1309.208740234375, "completions/mean_terminated_length": 680.1792602539062, "completions/min_length": 202.0, "completions/min_terminated_length": 202.0, "epoch": 3.1329833770778652, "grad_norm": 0.5395275354385376, "learning_rate": 1e-06, "loss": -0.0525, "num_tokens": 200067925.0, "reward": 0.4542410969734192, "reward_std": 0.17051561176776886, "rewards/verify_math_reward/mean": 0.4542410671710968, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 1341 }, { "clip_ratio/high_max": 0.0023494239430874586, "clip_ratio/high_mean": 0.0009941224834619788, "clip_ratio/low_mean": 0.0008678885433255346, "clip_ratio/low_min": 1.367016648146091e-05, "clip_ratio/region_mean": 0.0018620109767653048, "epoch": 3.135316418780986, "grad_norm": 0.4506688117980957, "learning_rate": 1e-06, "loss": -0.0525, "step": 1342 }, { "clip_ratio/high_max": 0.002392055273958249, "clip_ratio/high_mean": 0.0009558897254464682, "clip_ratio/low_mean": 0.0010006935117417015, "clip_ratio/low_min": 1.1087458005931694e-05, "clip_ratio/region_mean": 0.001956583248102106, "epoch": 3.137649460484106, "grad_norm": 0.5600183010101318, "learning_rate": 1e-06, "loss": -0.0525, "step": 1343 }, { "clip_ratio/high_max": 0.0025520645940559916, "clip_ratio/high_mean": 0.0010289657911926042, "clip_ratio/low_mean": 0.0011777448562497739, "clip_ratio/low_min": 1.367016648146091e-05, "clip_ratio/region_mean": 0.0022067106765462086, "epoch": 3.1399825021872267, "grad_norm": 0.28029865026474, "learning_rate": 1e-06, "loss": -0.0529, "step": 1344 }, { "clip_ratio/high_max": 0.002642427927639801, "clip_ratio/high_mean": 0.0010306891435902799, "clip_ratio/low_mean": 0.0006044321426088572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016351212980225682, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3342.0, "completions/mean_length": 1236.685302734375, "completions/mean_terminated_length": 615.0950927734375, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 3.142315543890347, "grad_norm": 0.8857288956642151, "learning_rate": 1e-06, "loss": -0.0797, "num_tokens": 200607939.0, "reward": 0.5379464626312256, "reward_std": 0.1614982634782791, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1345 }, { "clip_ratio/high_max": 0.0032171261409530416, "clip_ratio/high_mean": 0.001155863134044921, "clip_ratio/low_mean": 0.0007052513192320475, "clip_ratio/low_min": 1.229346980835544e-05, "clip_ratio/region_mean": 0.0018611144769238308, "epoch": 3.1446485855934676, "grad_norm": 1.2734124660491943, "learning_rate": 1e-06, "loss": -0.0798, "step": 1346 }, { "clip_ratio/high_max": 0.0033663002323010005, "clip_ratio/high_mean": 0.0012549683888209984, "clip_ratio/low_mean": 0.0008860008138071862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002140969198080711, "epoch": 3.146981627296588, "grad_norm": 3.4096999168395996, "learning_rate": 1e-06, "loss": -0.0793, "step": 1347 }, { "clip_ratio/high_max": 0.002615592624351848, "clip_ratio/high_mean": 0.001020362569761346, "clip_ratio/low_mean": 0.000937191080993216, "clip_ratio/low_min": 1.229346980835544e-05, "clip_ratio/region_mean": 0.001957553678948898, "epoch": 3.1493146689997085, "grad_norm": 0.38200393319129944, "learning_rate": 1e-06, "loss": -0.0786, "step": 1348 }, { "clip_ratio/high_max": 0.004811725615581963, "clip_ratio/high_mean": 0.001638601153899799, "clip_ratio/low_mean": 0.0008993576002467307, "clip_ratio/low_min": 4.3523676140466705e-05, "clip_ratio/region_mean": 0.0025379588041687384, "completions/clipped_ratio": 0.2544642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 1591.7645263671875, "completions/mean_terminated_length": 737.0254516601562, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 3.1516477107028287, "grad_norm": 0.5609940886497498, "learning_rate": 1e-06, "loss": -0.0835, "num_tokens": 201214856.0, "reward": 0.4441964626312256, "reward_std": 0.1707095205783844, "rewards/verify_math_reward/mean": 0.4441964328289032, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 1349 }, { "clip_ratio/high_max": 0.004658862904761918, "clip_ratio/high_mean": 0.0016216683598031523, "clip_ratio/low_mean": 0.0010475550161572755, "clip_ratio/low_min": 2.1761838070233352e-05, "clip_ratio/region_mean": 0.0026692233441281132, "epoch": 3.1539807524059493, "grad_norm": 0.5175382494926453, "learning_rate": 1e-06, "loss": -0.0836, "step": 1350 }, { "clip_ratio/high_max": 0.005925904857576825, "clip_ratio/high_mean": 0.0019004394744115416, "clip_ratio/low_mean": 0.0011803701072494732, "clip_ratio/low_min": 4.121784877497703e-05, "clip_ratio/region_mean": 0.0030808095762040466, "epoch": 3.1563137941090695, "grad_norm": 1.168426513671875, "learning_rate": 1e-06, "loss": -0.0838, "step": 1351 }, { "clip_ratio/high_max": 0.005969589663436636, "clip_ratio/high_mean": 0.0018669183573365444, "clip_ratio/low_mean": 0.001317282981290191, "clip_ratio/low_min": 2.1761838070233352e-05, "clip_ratio/region_mean": 0.003184201385010965, "epoch": 3.15864683581219, "grad_norm": 0.8309394717216492, "learning_rate": 1e-06, "loss": -0.0839, "step": 1352 }, { "clip_ratio/high_max": 0.002869917036150582, "clip_ratio/high_mean": 0.0009604938641132321, "clip_ratio/low_mean": 0.0006256687647692161, "clip_ratio/low_min": 1.2047031304973643e-05, "clip_ratio/region_mean": 0.0015861626452533528, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3286.0, "completions/mean_length": 1290.2332763671875, "completions/mean_terminated_length": 666.3042602539062, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.1609798775153104, "grad_norm": 1.5694860219955444, "learning_rate": 1e-06, "loss": -0.0838, "num_tokens": 201795785.0, "reward": 0.5301339626312256, "reward_std": 0.15601007640361786, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1353 }, { "clip_ratio/high_max": 0.0030587673136324156, "clip_ratio/high_mean": 0.0010293627619830659, "clip_ratio/low_mean": 0.0007350237028731499, "clip_ratio/low_min": 2.4094062609947287e-05, "clip_ratio/region_mean": 0.0017643864630372263, "epoch": 3.163312919218431, "grad_norm": 0.36914658546447754, "learning_rate": 1e-06, "loss": -0.0842, "step": 1354 }, { "clip_ratio/high_max": 0.0038592028213315643, "clip_ratio/high_mean": 0.0011869870504597202, "clip_ratio/low_mean": 0.0009460471555939876, "clip_ratio/low_min": 3.6141096643405035e-05, "clip_ratio/region_mean": 0.002133034242433496, "epoch": 3.1656459609215517, "grad_norm": 0.6679443717002869, "learning_rate": 1e-06, "loss": -0.0845, "step": 1355 }, { "clip_ratio/high_max": 0.003286710983957164, "clip_ratio/high_mean": 0.0010715503904066281, "clip_ratio/low_mean": 0.0009874404895526823, "clip_ratio/low_min": 6.871367804706097e-05, "clip_ratio/region_mean": 0.0020589908890542574, "epoch": 3.167979002624672, "grad_norm": 0.560174822807312, "learning_rate": 1e-06, "loss": -0.0845, "step": 1356 }, { "clip_ratio/high_max": 0.0024601607183285523, "clip_ratio/high_mean": 0.0008686525816301582, "clip_ratio/low_mean": 0.0006411555041268002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001509808102127863, "completions/clipped_ratio": 0.2042410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3544.0, "completions/mean_length": 1347.65625, "completions/mean_terminated_length": 642.2608642578125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.1703120443277926, "grad_norm": 0.6964887380599976, "learning_rate": 1e-06, "loss": -0.082, "num_tokens": 202338613.0, "reward": 0.5212053656578064, "reward_std": 0.16104501485824585, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1357 }, { "clip_ratio/high_max": 0.0031116531317820773, "clip_ratio/high_mean": 0.0010755942912510363, "clip_ratio/low_mean": 0.0007964372707647271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018720315129030496, "epoch": 3.1726450860309128, "grad_norm": 0.38094016909599304, "learning_rate": 1e-06, "loss": -0.0822, "step": 1358 }, { "clip_ratio/high_max": 0.0030522368833771907, "clip_ratio/high_mean": 0.0010401869021734456, "clip_ratio/low_mean": 0.0008995835687528597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00193977045637439, "epoch": 3.1749781277340334, "grad_norm": 0.5797291398048401, "learning_rate": 1e-06, "loss": -0.0823, "step": 1359 }, { "clip_ratio/high_max": 0.00328709705354413, "clip_ratio/high_mean": 0.0011368661507731304, "clip_ratio/low_mean": 0.0010334804446756607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021703466190956533, "epoch": 3.1773111694371536, "grad_norm": 0.4949369728565216, "learning_rate": 1e-06, "loss": -0.0824, "step": 1360 }, { "clip_ratio/high_max": 0.0029910024895798415, "clip_ratio/high_mean": 0.0010660776169970632, "clip_ratio/low_mean": 0.0007899036963863182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018559813179308549, "completions/clipped_ratio": 0.1964285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 1355.282470703125, "completions/mean_terminated_length": 685.3291625976562, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 3.1796442111402743, "grad_norm": 0.539795994758606, "learning_rate": 1e-06, "loss": -0.0712, "num_tokens": 202914722.0, "reward": 0.5022321939468384, "reward_std": 0.16288946568965912, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 1361 }, { "clip_ratio/high_max": 0.004078404366737232, "clip_ratio/high_mean": 0.0012560861214296892, "clip_ratio/low_mean": 0.0009283058541313949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021843920039827935, "epoch": 3.1819772528433945, "grad_norm": 0.9447586536407471, "learning_rate": 1e-06, "loss": -0.0714, "step": 1362 }, { "clip_ratio/high_max": 0.003760849234822672, "clip_ratio/high_mean": 0.0013530468349927105, "clip_ratio/low_mean": 0.0011362202567397617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024892671062843874, "epoch": 3.184310294546515, "grad_norm": 0.4439539313316345, "learning_rate": 1e-06, "loss": -0.0717, "step": 1363 }, { "clip_ratio/high_max": 0.0037160386200412177, "clip_ratio/high_mean": 0.0012152508097642567, "clip_ratio/low_mean": 0.0011705264578267816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023857773194322363, "epoch": 3.1866433362496354, "grad_norm": 0.39168334007263184, "learning_rate": 1e-06, "loss": -0.0717, "step": 1364 }, { "clip_ratio/high_max": 0.002782148090773262, "clip_ratio/high_mean": 0.0011425865377532318, "clip_ratio/low_mean": 0.0006377365880325669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017803231385187246, "completions/clipped_ratio": 0.2232142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 1488.3070068359375, "completions/mean_terminated_length": 738.9698486328125, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 3.188976377952756, "grad_norm": 4.965093612670898, "learning_rate": 1e-06, "loss": -0.091, "num_tokens": 203522517.0, "reward": 0.5033482313156128, "reward_std": 0.17618855834007263, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1365 }, { "clip_ratio/high_max": 0.0025798141141422093, "clip_ratio/high_mean": 0.0010454291368660051, "clip_ratio/low_mean": 0.0006525516701003653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016979807951429393, "epoch": 3.1913094196558762, "grad_norm": 0.5176095962524414, "learning_rate": 1e-06, "loss": -0.0913, "step": 1366 }, { "clip_ratio/high_max": 0.0031459720485145226, "clip_ratio/high_mean": 0.0012534310408227611, "clip_ratio/low_mean": 0.0009121228158619488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021655537639162503, "epoch": 3.193642461358997, "grad_norm": 0.3223971128463745, "learning_rate": 1e-06, "loss": -0.0917, "step": 1367 }, { "clip_ratio/high_max": 0.002745188401604537, "clip_ratio/high_mean": 0.0011871217193402117, "clip_ratio/low_mean": 0.0008909531334211351, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020780748745892197, "epoch": 3.195975503062117, "grad_norm": 0.7301528453826904, "learning_rate": 1e-06, "loss": -0.0916, "step": 1368 }, { "clip_ratio/high_max": 0.0024880852579372004, "clip_ratio/high_mean": 0.0010448252487549325, "clip_ratio/low_mean": 0.0005945920593148912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001639417307160329, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 1299.65625, "completions/mean_terminated_length": 663.7753295898438, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 3.1983085447652377, "grad_norm": 0.7929269671440125, "learning_rate": 1e-06, "loss": -0.0707, "num_tokens": 204093177.0, "reward": 0.5323660969734192, "reward_std": 0.17648912966251373, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1369 }, { "clip_ratio/high_max": 0.0027205943479202688, "clip_ratio/high_mean": 0.00117167400821927, "clip_ratio/low_mean": 0.0007481324018954183, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019198064146621618, "epoch": 3.200641586468358, "grad_norm": 2.468611240386963, "learning_rate": 1e-06, "loss": -0.0709, "step": 1370 }, { "clip_ratio/high_max": 0.0024143694463418797, "clip_ratio/high_mean": 0.001115655397370574, "clip_ratio/low_mean": 0.0007409417357848724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018565971404314041, "epoch": 3.2029746281714786, "grad_norm": 4.255047798156738, "learning_rate": 1e-06, "loss": -0.0693, "step": 1371 }, { "clip_ratio/high_max": 0.0023001880035735667, "clip_ratio/high_mean": 0.0009246189038094599, "clip_ratio/low_mean": 0.0008774770540185273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018020959469140507, "epoch": 3.205307669874599, "grad_norm": 3.9571685791015625, "learning_rate": 1e-06, "loss": -0.0708, "step": 1372 }, { "clip_ratio/high_max": 0.0027346681963535957, "clip_ratio/high_mean": 0.0009395974047947675, "clip_ratio/low_mean": 0.0006690344271191861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016086318464658689, "completions/clipped_ratio": 0.2287946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3483.0, "completions/mean_length": 1448.938720703125, "completions/mean_terminated_length": 663.6309204101562, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.2076407115777195, "grad_norm": 8.795405387878418, "learning_rate": 1e-06, "loss": -0.0524, "num_tokens": 204640306.0, "reward": 0.4776785969734192, "reward_std": 0.1588319092988968, "rewards/verify_math_reward/mean": 0.4776785671710968, "rewards/verify_math_reward/std": 0.4997805058956146, "step": 1373 }, { "clip_ratio/high_max": 0.002447092061629519, "clip_ratio/high_mean": 0.0009054910096892854, "clip_ratio/low_mean": 0.0006118716373748612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015173626634350512, "epoch": 3.20997375328084, "grad_norm": 0.5467214584350586, "learning_rate": 1e-06, "loss": -0.0544, "step": 1374 }, { "clip_ratio/high_max": 0.0035162466156180017, "clip_ratio/high_mean": 0.0011321973825033638, "clip_ratio/low_mean": 0.0008281342816189863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019603316541179083, "epoch": 3.2123067949839603, "grad_norm": 0.37862253189086914, "learning_rate": 1e-06, "loss": -0.0546, "step": 1375 }, { "clip_ratio/high_max": 0.003465507543296553, "clip_ratio/high_mean": 0.001109286883547611, "clip_ratio/low_mean": 0.000977206340394332, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002086493208480533, "epoch": 3.214639836687081, "grad_norm": 0.371071457862854, "learning_rate": 1e-06, "loss": -0.0546, "step": 1376 }, { "clip_ratio/high_max": 0.002552015175751876, "clip_ratio/high_mean": 0.0009494055520917755, "clip_ratio/low_mean": 0.0004404852979860152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013898908437113278, "completions/clipped_ratio": 0.25, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 1579.040283203125, "completions/mean_terminated_length": 740.0535888671875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 3.216972878390201, "grad_norm": 2.2727036476135254, "learning_rate": 1e-06, "loss": -0.1024, "num_tokens": 205232406.0, "reward": 0.4352678656578064, "reward_std": 0.15736624598503113, "rewards/verify_math_reward/mean": 0.4352678656578064, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 1377 }, { "clip_ratio/high_max": 0.003130967525066808, "clip_ratio/high_mean": 0.001092183250875678, "clip_ratio/low_mean": 0.0005591121985162317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001651295449846657, "epoch": 3.219305920093322, "grad_norm": 0.6352220773696899, "learning_rate": 1e-06, "loss": -0.1029, "step": 1378 }, { "clip_ratio/high_max": 0.0032222638801613357, "clip_ratio/high_mean": 0.00118035624109325, "clip_ratio/low_mean": 0.0006184545677569986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001798810793843586, "epoch": 3.221638961796442, "grad_norm": 0.2876947522163391, "learning_rate": 1e-06, "loss": -0.1029, "step": 1379 }, { "clip_ratio/high_max": 0.0035058354260399938, "clip_ratio/high_mean": 0.0012087200484529603, "clip_ratio/low_mean": 0.0007120401464817405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019207601799280383, "epoch": 3.2239720034995627, "grad_norm": 0.27707159519195557, "learning_rate": 1e-06, "loss": -0.1031, "step": 1380 }, { "clip_ratio/high_max": 0.0025328827905468643, "clip_ratio/high_mean": 0.0008817203979560873, "clip_ratio/low_mean": 0.0006479492994913016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015296697165467776, "completions/clipped_ratio": 0.1763392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3167.0, "completions/mean_length": 1278.7366943359375, "completions/mean_terminated_length": 675.5826416015625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.226305045202683, "grad_norm": 16.94701385498047, "learning_rate": 1e-06, "loss": -0.0754, "num_tokens": 205814938.0, "reward": 0.5803571939468384, "reward_std": 0.14989416301250458, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 1381 }, { "clip_ratio/high_max": 0.002526170057535637, "clip_ratio/high_mean": 0.0009053959784068866, "clip_ratio/low_mean": 0.0007093360140970617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016147320420714095, "epoch": 3.2286380869058036, "grad_norm": 2.0799663066864014, "learning_rate": 1e-06, "loss": -0.0765, "step": 1382 }, { "clip_ratio/high_max": 0.0025850008241832256, "clip_ratio/high_mean": 0.0008841140734148212, "clip_ratio/low_mean": 0.0008762739005305775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017603879678063095, "epoch": 3.2309711286089238, "grad_norm": 1.213025450706482, "learning_rate": 1e-06, "loss": -0.0765, "step": 1383 }, { "clip_ratio/high_max": 0.0026354247092967853, "clip_ratio/high_mean": 0.0009250428556697443, "clip_ratio/low_mean": 0.0009081499420062755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018331928149564192, "epoch": 3.2333041703120444, "grad_norm": 7.2684736251831055, "learning_rate": 1e-06, "loss": -0.0762, "step": 1384 }, { "clip_ratio/high_max": 0.002342450294236187, "clip_ratio/high_mean": 0.0008767494418862043, "clip_ratio/low_mean": 0.0007095029468473513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015862523760006297, "completions/clipped_ratio": 0.1886160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3447.0, "completions/mean_length": 1326.9029541015625, "completions/mean_terminated_length": 683.1925659179688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.2356372120151646, "grad_norm": 318.26055908203125, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 206402851.0, "reward": 0.4687500298023224, "reward_std": 0.16334481537342072, "rewards/verify_math_reward/mean": 0.46875, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1385 }, { "clip_ratio/high_max": 0.0023308937525143847, "clip_ratio/high_mean": 0.0008574928015150363, "clip_ratio/low_mean": 0.0007647560405530385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016222488266066648, "epoch": 3.2379702537182853, "grad_norm": 0.7182860970497131, "learning_rate": 1e-06, "loss": -0.0576, "step": 1386 }, { "clip_ratio/high_max": 0.002917265665018931, "clip_ratio/high_mean": 0.0010551594004937215, "clip_ratio/low_mean": 0.0007679488262510858, "clip_ratio/low_min": 3.4199725632788613e-05, "clip_ratio/region_mean": 0.001823108224925818, "epoch": 3.2403032954214055, "grad_norm": 1.0052074193954468, "learning_rate": 1e-06, "loss": -0.0585, "step": 1387 }, { "clip_ratio/high_max": 0.0024743064132053405, "clip_ratio/high_mean": 0.0009231535404978786, "clip_ratio/low_mean": 0.001016775515381596, "clip_ratio/low_min": 1.7099862816394307e-05, "clip_ratio/region_mean": 0.0019399290613364428, "epoch": 3.242636337124526, "grad_norm": 0.8025463819503784, "learning_rate": 1e-06, "loss": -0.0585, "step": 1388 }, { "clip_ratio/high_max": 0.0025961357314372435, "clip_ratio/high_mean": 0.0010164854502363596, "clip_ratio/low_mean": 0.0006333926876322948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016498781405971386, "completions/clipped_ratio": 0.1886160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3397.0, "completions/mean_length": 1288.9945068359375, "completions/mean_terminated_length": 636.4718017578125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.2449693788276464, "grad_norm": 4.326656818389893, "learning_rate": 1e-06, "loss": -0.0686, "num_tokens": 206956766.0, "reward": 0.4966517984867096, "reward_std": 0.16953881084918976, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1389 }, { "clip_ratio/high_max": 0.0028496361046563834, "clip_ratio/high_mean": 0.0010434309369884431, "clip_ratio/low_mean": 0.0007520835979448748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017955145885935053, "epoch": 3.247302420530767, "grad_norm": 0.3874962329864502, "learning_rate": 1e-06, "loss": -0.0697, "step": 1390 }, { "clip_ratio/high_max": 0.0027638802712317556, "clip_ratio/high_mean": 0.0010941135369648691, "clip_ratio/low_mean": 0.0009078490002139006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020019625662826, "epoch": 3.249635462233887, "grad_norm": 0.4991978704929352, "learning_rate": 1e-06, "loss": -0.0699, "step": 1391 }, { "clip_ratio/high_max": 0.0025774319074116647, "clip_ratio/high_mean": 0.0010858941423066426, "clip_ratio/low_mean": 0.0010145230535272276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002100417194014881, "epoch": 3.251968503937008, "grad_norm": 0.2960955500602722, "learning_rate": 1e-06, "loss": -0.07, "step": 1392 }, { "clip_ratio/high_max": 0.002783117517537903, "clip_ratio/high_mean": 0.0010812836371769663, "clip_ratio/low_mean": 0.0007540768829130684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001835360482800752, "completions/clipped_ratio": 0.2845982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 1715.9376220703125, "completions/mean_terminated_length": 769.1107788085938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 3.2543015456401285, "grad_norm": 0.45290884375572205, "learning_rate": 1e-06, "loss": -0.0888, "num_tokens": 207544614.0, "reward": 0.369419664144516, "reward_std": 0.16544775664806366, "rewards/verify_math_reward/mean": 0.3694196343421936, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1393 }, { "clip_ratio/high_max": 0.003534470670274459, "clip_ratio/high_mean": 0.0012682851811405271, "clip_ratio/low_mean": 0.000887938283085532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021562235051533207, "epoch": 3.2566345873432487, "grad_norm": 0.8811686038970947, "learning_rate": 1e-06, "loss": -0.0889, "step": 1394 }, { "clip_ratio/high_max": 0.002995120608829893, "clip_ratio/high_mean": 0.0012127757981943432, "clip_ratio/low_mean": 0.0011744229559553787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002387198750511743, "epoch": 3.2589676290463694, "grad_norm": 0.33912193775177, "learning_rate": 1e-06, "loss": -0.0891, "step": 1395 }, { "clip_ratio/high_max": 0.0032918516371864825, "clip_ratio/high_mean": 0.0012751897520502098, "clip_ratio/low_mean": 0.0011152186070830794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023904083209345117, "epoch": 3.2613006707494896, "grad_norm": 0.4432776868343353, "learning_rate": 1e-06, "loss": -0.0892, "step": 1396 }, { "clip_ratio/high_max": 0.0026457774292794056, "clip_ratio/high_mean": 0.000967094534644275, "clip_ratio/low_mean": 0.0006840386668045539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016511332250956912, "completions/clipped_ratio": 0.1752232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 1319.204345703125, "completions/mean_terminated_length": 729.2760009765625, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 3.2636337124526102, "grad_norm": 12.320646286010742, "learning_rate": 1e-06, "loss": -0.0725, "num_tokens": 208176013.0, "reward": 0.4743303656578064, "reward_std": 0.1617611199617386, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 1397 }, { "clip_ratio/high_max": 0.0024350592866539955, "clip_ratio/high_mean": 0.0009481079068791587, "clip_ratio/low_mean": 0.0007949888122311677, "clip_ratio/low_min": 9.806998605199624e-06, "clip_ratio/region_mean": 0.0017430967272957787, "epoch": 3.2659667541557305, "grad_norm": 0.47730156779289246, "learning_rate": 1e-06, "loss": 0.0857, "step": 1398 }, { "clip_ratio/high_max": 0.0026412616498419084, "clip_ratio/high_mean": 0.0009766995444806525, "clip_ratio/low_mean": 0.0009731514455779688, "clip_ratio/low_min": 1.4236901733966079e-05, "clip_ratio/region_mean": 0.001949851033714367, "epoch": 3.268299795858851, "grad_norm": 0.5293362736701965, "learning_rate": 1e-06, "loss": -0.0733, "step": 1399 }, { "clip_ratio/high_max": 0.0028248296366655268, "clip_ratio/high_mean": 0.0009744759336172137, "clip_ratio/low_mean": 0.001041641051415354, "clip_ratio/low_min": 5.884198981220834e-05, "clip_ratio/region_mean": 0.0020161169522907585, "epoch": 3.2706328375619713, "grad_norm": 0.6328209638595581, "learning_rate": 1e-06, "loss": -0.0734, "step": 1400 }, { "clip_ratio/high_max": 0.0021699746139347553, "clip_ratio/high_mean": 0.0007465785147360293, "clip_ratio/low_mean": 0.0005778353183814033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013244137771835085, "completions/clipped_ratio": 0.1975446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 1345.4609375, "completions/mean_terminated_length": 668.3463134765625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.272965879265092, "grad_norm": 3.247349262237549, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 208743210.0, "reward": 0.4642857313156128, "reward_std": 0.12268754839897156, "rewards/verify_math_reward/mean": 0.4642857015132904, "rewards/verify_math_reward/std": 0.4990013837814331, "step": 1401 }, { "clip_ratio/high_max": 0.0023044863482937217, "clip_ratio/high_mean": 0.0008059674655669369, "clip_ratio/low_mean": 0.000640840547021071, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014468080044025555, "epoch": 3.275298920968212, "grad_norm": 1.2734745740890503, "learning_rate": 1e-06, "loss": -0.0569, "step": 1402 }, { "clip_ratio/high_max": 0.00217683797745849, "clip_ratio/high_mean": 0.0007424801751767518, "clip_ratio/low_mean": 0.0007512282509196666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014937084524717648, "epoch": 3.277631962671333, "grad_norm": 0.3829127252101898, "learning_rate": 1e-06, "loss": -0.0571, "step": 1403 }, { "clip_ratio/high_max": 0.0025350040523335338, "clip_ratio/high_mean": 0.0007625247781106737, "clip_ratio/low_mean": 0.0008823660791676957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001644890882744221, "epoch": 3.279965004374453, "grad_norm": 0.24608905613422394, "learning_rate": 1e-06, "loss": -0.0573, "step": 1404 }, { "clip_ratio/high_max": 0.0029620363493449986, "clip_ratio/high_mean": 0.0011208229971089168, "clip_ratio/low_mean": 0.000741040808861726, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018618638277985156, "completions/clipped_ratio": 0.2276785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3990.0, "completions/mean_length": 1583.4085693359375, "completions/mean_terminated_length": 842.7022705078125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 3.2822980460775737, "grad_norm": 0.5190527439117432, "learning_rate": 1e-06, "loss": -0.1067, "num_tokens": 209421008.0, "reward": 0.4531250298023224, "reward_std": 0.19794683158397675, "rewards/verify_math_reward/mean": 0.453125, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1405 }, { "clip_ratio/high_max": 0.002984334285429213, "clip_ratio/high_mean": 0.001200127117044758, "clip_ratio/low_mean": 0.0008893229187378893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020894500266877003, "epoch": 3.284631087780694, "grad_norm": 2.4828343391418457, "learning_rate": 1e-06, "loss": -0.1066, "step": 1406 }, { "clip_ratio/high_max": 0.0029198408738011494, "clip_ratio/high_mean": 0.001155474419647362, "clip_ratio/low_mean": 0.0008952020325523335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002050676426733844, "epoch": 3.2869641294838146, "grad_norm": 6.388377666473389, "learning_rate": 1e-06, "loss": -0.1065, "step": 1407 }, { "clip_ratio/high_max": 0.0029759134849882685, "clip_ratio/high_mean": 0.0011245101941312896, "clip_ratio/low_mean": 0.0010865793665288948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022110894933575764, "epoch": 3.289297171186935, "grad_norm": 0.3573254942893982, "learning_rate": 1e-06, "loss": -0.107, "step": 1408 }, { "clip_ratio/high_max": 0.002647025954502169, "clip_ratio/high_mean": 0.0010254958351652022, "clip_ratio/low_mean": 0.000677963125781389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017034589691320434, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 1305.1607666015625, "completions/mean_terminated_length": 661.1209106445312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.2916302128900554, "grad_norm": 0.5672773122787476, "learning_rate": 1e-06, "loss": -0.0785, "num_tokens": 209995824.0, "reward": 0.527901828289032, "reward_std": 0.15796427428722382, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1409 }, { "clip_ratio/high_max": 0.002972642563690897, "clip_ratio/high_mean": 0.0011128113037557341, "clip_ratio/low_mean": 0.0007964527449075831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019092640977760311, "epoch": 3.2939632545931756, "grad_norm": 1.4203097820281982, "learning_rate": 1e-06, "loss": -0.0784, "step": 1410 }, { "clip_ratio/high_max": 0.003029405095730908, "clip_ratio/high_mean": 0.001124420081396238, "clip_ratio/low_mean": 0.000891536554263439, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002015956648392603, "epoch": 3.2962962962962963, "grad_norm": 0.3065042197704315, "learning_rate": 1e-06, "loss": -0.0787, "step": 1411 }, { "clip_ratio/high_max": 0.0027520588773768395, "clip_ratio/high_mean": 0.001081164844435989, "clip_ratio/low_mean": 0.0010083860852319049, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020895508569083177, "epoch": 3.298629337999417, "grad_norm": 0.32295405864715576, "learning_rate": 1e-06, "loss": -0.0788, "step": 1412 }, { "clip_ratio/high_max": 0.0029114385215507355, "clip_ratio/high_mean": 0.0011068600488215452, "clip_ratio/low_mean": 0.0007234404361042834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018303004726476502, "completions/clipped_ratio": 0.1886160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 1350.8270263671875, "completions/mean_terminated_length": 712.6781616210938, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 3.300962379702537, "grad_norm": 1.7381664514541626, "learning_rate": 1e-06, "loss": -0.0846, "num_tokens": 210604861.0, "reward": 0.494419664144516, "reward_std": 0.16837625205516815, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1413 }, { "clip_ratio/high_max": 0.0027337188221281394, "clip_ratio/high_mean": 0.0011319485001877183, "clip_ratio/low_mean": 0.0007105822305675247, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018425306989229284, "epoch": 3.303295421405658, "grad_norm": 1.8672881126403809, "learning_rate": 1e-06, "loss": -0.0845, "step": 1414 }, { "clip_ratio/high_max": 0.0030002702624187805, "clip_ratio/high_mean": 0.001155874948381097, "clip_ratio/low_mean": 0.0009135879408859182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020694628401543014, "epoch": 3.305628463108778, "grad_norm": 1.1596306562423706, "learning_rate": 1e-06, "loss": -0.0848, "step": 1415 }, { "clip_ratio/high_max": 0.0033347109565511346, "clip_ratio/high_mean": 0.0012146389490226284, "clip_ratio/low_mean": 0.0009481958022661274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021628347894875333, "epoch": 3.3079615048118987, "grad_norm": 0.3548368215560913, "learning_rate": 1e-06, "loss": -0.0849, "step": 1416 }, { "clip_ratio/high_max": 0.002432540561130736, "clip_ratio/high_mean": 0.0008466996696370188, "clip_ratio/low_mean": 0.0005285848505991453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001375284518871922, "completions/clipped_ratio": 0.1796875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3181.0, "completions/mean_length": 1248.8304443359375, "completions/mean_terminated_length": 625.1646118164062, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 3.310294546515019, "grad_norm": 0.5875809788703918, "learning_rate": 1e-06, "loss": -0.0761, "num_tokens": 211141725.0, "reward": 0.5814732313156128, "reward_std": 0.15131452679634094, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1417 }, { "clip_ratio/high_max": 0.0027537934365682304, "clip_ratio/high_mean": 0.001026818130412721, "clip_ratio/low_mean": 0.0007809083763277158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001807726570405066, "epoch": 3.3126275882181395, "grad_norm": 1042825152.0, "learning_rate": 1e-06, "loss": 20942.2344, "step": 1418 }, { "clip_ratio/high_max": 0.0020554180555336643, "clip_ratio/high_mean": 0.0007562755927210674, "clip_ratio/low_mean": 0.0007345620324485935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014908376324456185, "epoch": 3.3149606299212597, "grad_norm": 0.3298655152320862, "learning_rate": 1e-06, "loss": -0.0764, "step": 1419 }, { "clip_ratio/high_max": 0.002568712137872353, "clip_ratio/high_mean": 0.0009551064777042484, "clip_ratio/low_mean": 0.0009328160367658711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018879224808188155, "epoch": 3.3172936716243804, "grad_norm": 0.9999549984931946, "learning_rate": 1e-06, "loss": -0.0763, "step": 1420 }, { "clip_ratio/high_max": 0.003004754580615554, "clip_ratio/high_mean": 0.0011823898457805626, "clip_ratio/low_mean": 0.000845771239255555, "clip_ratio/low_min": 3.580123302526772e-05, "clip_ratio/region_mean": 0.002028161099588033, "completions/clipped_ratio": 0.2064732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 1412.5435791015625, "completions/mean_terminated_length": 714.3164672851562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.3196267133275006, "grad_norm": 0.7149144411087036, "learning_rate": 1e-06, "loss": -0.0864, "num_tokens": 211740364.0, "reward": 0.4955357313156128, "reward_std": 0.18080812692642212, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 1421 }, { "clip_ratio/high_max": 0.0032933949332800694, "clip_ratio/high_mean": 0.0013738610650761984, "clip_ratio/low_mean": 0.0011077059425588232, "clip_ratio/low_min": 5.8520599850453436e-05, "clip_ratio/region_mean": 0.0024815670694806613, "epoch": 3.3219597550306212, "grad_norm": 0.4314590394496918, "learning_rate": 1e-06, "loss": -0.0865, "step": 1422 }, { "clip_ratio/high_max": 0.0034335812233621255, "clip_ratio/high_mean": 0.001283448455069447, "clip_ratio/low_mean": 0.0011185312978341244, "clip_ratio/low_min": 6.422112346626818e-05, "clip_ratio/region_mean": 0.0024019797929213382, "epoch": 3.3242927967337415, "grad_norm": 0.3747115135192871, "learning_rate": 1e-06, "loss": -0.0851, "step": 1423 }, { "clip_ratio/high_max": 0.0036144007317489013, "clip_ratio/high_mean": 0.0013670921543962322, "clip_ratio/low_mean": 0.0013429825885395985, "clip_ratio/low_min": 6.422112346626818e-05, "clip_ratio/region_mean": 0.002710074753849767, "epoch": 3.326625838436862, "grad_norm": 1.0429192781448364, "learning_rate": 1e-06, "loss": -0.0869, "step": 1424 }, { "clip_ratio/high_max": 0.0028102659089199733, "clip_ratio/high_mean": 0.001096544177926262, "clip_ratio/low_mean": 0.000521977398875606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016185215972654987, "completions/clipped_ratio": 0.2087053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 1446.1920166015625, "completions/mean_terminated_length": 747.3004760742188, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.3289588801399823, "grad_norm": 0.5798770785331726, "learning_rate": 1e-06, "loss": -0.0755, "num_tokens": 212367304.0, "reward": 0.4654017984867096, "reward_std": 0.16750933229923248, "rewards/verify_math_reward/mean": 0.4654017984867096, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1425 }, { "clip_ratio/high_max": 0.003237814860767685, "clip_ratio/high_mean": 0.0012351388395472895, "clip_ratio/low_mean": 0.0006693589093629271, "clip_ratio/low_min": 2.2818547222414054e-05, "clip_ratio/region_mean": 0.0019044977816520259, "epoch": 3.331291921843103, "grad_norm": 0.4954063594341278, "learning_rate": 1e-06, "loss": -0.0756, "step": 1426 }, { "clip_ratio/high_max": 0.003399645720492117, "clip_ratio/high_mean": 0.0012396289639582392, "clip_ratio/low_mean": 0.0008192620589397848, "clip_ratio/low_min": 3.829187699011527e-05, "clip_ratio/region_mean": 0.0020588910338119604, "epoch": 3.3336249635462236, "grad_norm": 0.36850064992904663, "learning_rate": 1e-06, "loss": -0.0758, "step": 1427 }, { "clip_ratio/high_max": 0.003157162464049179, "clip_ratio/high_mean": 0.00129393611496198, "clip_ratio/low_mean": 0.0009247879861504771, "clip_ratio/low_min": 2.297512583027128e-05, "clip_ratio/region_mean": 0.0022187241265783086, "epoch": 3.335958005249344, "grad_norm": 0.6415751576423645, "learning_rate": 1e-06, "loss": -0.0759, "step": 1428 }, { "clip_ratio/high_max": 0.0021188479149714112, "clip_ratio/high_mean": 0.000823663915070938, "clip_ratio/low_mean": 0.0008891550005500903, "clip_ratio/low_min": 1.9759721908485517e-05, "clip_ratio/region_mean": 0.0017128188883361872, "completions/clipped_ratio": 0.2142857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 1406.6239013671875, "completions/mean_terminated_length": 673.15771484375, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 3.338291046952464, "grad_norm": 0.609437108039856, "learning_rate": 1e-06, "loss": -0.0365, "num_tokens": 212924975.0, "reward": 0.5078125, "reward_std": 0.16548235714435577, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1429 }, { "clip_ratio/high_max": 0.0026211207077722065, "clip_ratio/high_mean": 0.0009961834475689102, "clip_ratio/low_mean": 0.0010782823383124196, "clip_ratio/low_min": 1.0051463505078573e-05, "clip_ratio/region_mean": 0.0020744658140756655, "epoch": 3.3406240886555847, "grad_norm": 0.7670366168022156, "learning_rate": 1e-06, "loss": -0.0367, "step": 1430 }, { "clip_ratio/high_max": 0.0026753891179396305, "clip_ratio/high_mean": 0.0010044256177934585, "clip_ratio/low_mean": 0.0011204668171558296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021248924313113093, "epoch": 3.3429571303587053, "grad_norm": 0.5424493551254272, "learning_rate": 1e-06, "loss": -0.0369, "step": 1431 }, { "clip_ratio/high_max": 0.002604341963888146, "clip_ratio/high_mean": 0.0009955262885341654, "clip_ratio/low_mean": 0.001375244699374889, "clip_ratio/low_min": 1.0051463505078573e-05, "clip_ratio/region_mean": 0.002370771053392673, "epoch": 3.3452901720618256, "grad_norm": 3.139183521270752, "learning_rate": 1e-06, "loss": -0.0367, "step": 1432 }, { "clip_ratio/high_max": 0.0024746718336245976, "clip_ratio/high_mean": 0.0009540909795759944, "clip_ratio/low_mean": 0.0005936102679697797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015477012420888059, "completions/clipped_ratio": 0.2488839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3571.0, "completions/mean_length": 1591.1251220703125, "completions/mean_terminated_length": 761.1292724609375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 3.347623213764946, "grad_norm": 0.30544647574424744, "learning_rate": 1e-06, "loss": -0.1087, "num_tokens": 213531935.0, "reward": 0.4676339626312256, "reward_std": 0.16555652022361755, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1433 }, { "clip_ratio/high_max": 0.002867212962883059, "clip_ratio/high_mean": 0.0011131716255476931, "clip_ratio/low_mean": 0.0007208121551229851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018339837770326994, "epoch": 3.3499562554680664, "grad_norm": 0.35072028636932373, "learning_rate": 1e-06, "loss": -0.1088, "step": 1434 }, { "clip_ratio/high_max": 0.0025396116470801644, "clip_ratio/high_mean": 0.0010405040247860597, "clip_ratio/low_mean": 0.000765930604757159, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018064346149913035, "epoch": 3.352289297171187, "grad_norm": 0.2634793519973755, "learning_rate": 1e-06, "loss": -0.1089, "step": 1435 }, { "clip_ratio/high_max": 0.002637325807882007, "clip_ratio/high_mean": 0.0010577619777905056, "clip_ratio/low_mean": 0.0009306338924943702, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019883958957507275, "epoch": 3.3546223388743073, "grad_norm": 1.5612716674804688, "learning_rate": 1e-06, "loss": -0.1088, "step": 1436 }, { "clip_ratio/high_max": 0.0026944315686705522, "clip_ratio/high_mean": 0.0010359417083236622, "clip_ratio/low_mean": 0.0006065038796805311, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001642445546167437, "completions/clipped_ratio": 0.2098214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3978.0, "completions/mean_length": 1402.3114013671875, "completions/mean_terminated_length": 687.0381469726562, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 3.356955380577428, "grad_norm": 0.3637184798717499, "learning_rate": 1e-06, "loss": -0.0759, "num_tokens": 214109270.0, "reward": 0.4642857313156128, "reward_std": 0.16270287334918976, "rewards/verify_math_reward/mean": 0.4642857015132904, "rewards/verify_math_reward/std": 0.4990013837814331, "step": 1437 }, { "clip_ratio/high_max": 0.0028838835496571846, "clip_ratio/high_mean": 0.0010895478808379266, "clip_ratio/low_mean": 0.0007404311572827282, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001829979031754192, "epoch": 3.359288422280548, "grad_norm": 0.40199917554855347, "learning_rate": 1e-06, "loss": -0.0759, "step": 1438 }, { "clip_ratio/high_max": 0.0029442552258842625, "clip_ratio/high_mean": 0.0010860856164072175, "clip_ratio/low_mean": 0.0008726650994503871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001958750741323456, "epoch": 3.361621463983669, "grad_norm": 0.4835832417011261, "learning_rate": 1e-06, "loss": -0.076, "step": 1439 }, { "clip_ratio/high_max": 0.0028520785563159734, "clip_ratio/high_mean": 0.0010870507940126117, "clip_ratio/low_mean": 0.0009618273452360881, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020488781228777952, "epoch": 3.363954505686789, "grad_norm": 0.5697627663612366, "learning_rate": 1e-06, "loss": -0.0762, "step": 1440 }, { "clip_ratio/high_max": 0.00308129455515882, "clip_ratio/high_mean": 0.0011796721191785764, "clip_ratio/low_mean": 0.0008069359482760774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001986608054721728, "completions/clipped_ratio": 0.2064732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 1388.95654296875, "completions/mean_terminated_length": 684.5921020507812, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 3.3662875473899097, "grad_norm": 0.4969393312931061, "learning_rate": 1e-06, "loss": -0.0695, "num_tokens": 214696735.0, "reward": 0.4821428656578064, "reward_std": 0.18911786377429962, "rewards/verify_math_reward/mean": 0.4821428656578064, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1441 }, { "clip_ratio/high_max": 0.003547340660588816, "clip_ratio/high_mean": 0.0013604699233837891, "clip_ratio/low_mean": 0.0009230463474523276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002283516267198138, "epoch": 3.36862058909303, "grad_norm": 23.681659698486328, "learning_rate": 1e-06, "loss": -0.0677, "step": 1442 }, { "clip_ratio/high_max": 0.0032434607201139443, "clip_ratio/high_mean": 0.0013388091465458274, "clip_ratio/low_mean": 0.0009992053583118832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023380144921247847, "epoch": 3.3709536307961505, "grad_norm": 0.3873210847377777, "learning_rate": 1e-06, "loss": -0.0699, "step": 1443 }, { "clip_ratio/high_max": 0.003445521491812542, "clip_ratio/high_mean": 0.0013774676372122485, "clip_ratio/low_mean": 0.0012471965401346097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026246641573379748, "epoch": 3.3732866724992707, "grad_norm": 8.211713790893555, "learning_rate": 1e-06, "loss": -0.0698, "step": 1444 }, { "clip_ratio/high_max": 0.0026872949601965956, "clip_ratio/high_mean": 0.0009567583711032057, "clip_ratio/low_mean": 0.0006788166965634446, "clip_ratio/low_min": 2.1701389414374717e-05, "clip_ratio/region_mean": 0.00163557511405088, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 1197.2054443359375, "completions/mean_terminated_length": 660.3915405273438, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 3.3756197142023914, "grad_norm": 3.064420700073242, "learning_rate": 1e-06, "loss": -0.065, "num_tokens": 215282143.0, "reward": 0.5502232313156128, "reward_std": 0.17607727646827698, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1445 }, { "clip_ratio/high_max": 0.0028752601574524306, "clip_ratio/high_mean": 0.0010341202596464427, "clip_ratio/low_mean": 0.0008127501314447727, "clip_ratio/low_min": 1.962015448953025e-05, "clip_ratio/region_mean": 0.0018468703929102048, "epoch": 3.377952755905512, "grad_norm": 0.472523033618927, "learning_rate": 1e-06, "loss": -0.0653, "step": 1446 }, { "clip_ratio/high_max": 0.0035553360867197625, "clip_ratio/high_mean": 0.00119334820919903, "clip_ratio/low_mean": 0.0009925317608576734, "clip_ratio/low_min": 1.029993381962413e-05, "clip_ratio/region_mean": 0.0021858799846086185, "epoch": 3.3802857976086322, "grad_norm": 0.35675838589668274, "learning_rate": 1e-06, "loss": -0.0655, "step": 1447 }, { "clip_ratio/high_max": 0.0035797232703771442, "clip_ratio/high_mean": 0.0012190593024570262, "clip_ratio/low_mean": 0.0012035203271807404, "clip_ratio/low_min": 1.962015448953025e-05, "clip_ratio/region_mean": 0.002422579658741597, "epoch": 3.382618839311753, "grad_norm": 0.830223798751831, "learning_rate": 1e-06, "loss": -0.0657, "step": 1448 }, { "clip_ratio/high_max": 0.002380781259489595, "clip_ratio/high_mean": 0.0008603558733284444, "clip_ratio/low_mean": 0.0006333430010272423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014936988809495233, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3469.0, "completions/mean_length": 1315.9576416015625, "completions/mean_terminated_length": 711.6005859375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.384951881014873, "grad_norm": 1.909867525100708, "learning_rate": 1e-06, "loss": -0.0615, "num_tokens": 215895977.0, "reward": 0.424107164144516, "reward_std": 0.16055577993392944, "rewards/verify_math_reward/mean": 0.4241071343421936, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1449 }, { "clip_ratio/high_max": 0.0022466445625468623, "clip_ratio/high_mean": 0.0008160398369909672, "clip_ratio/low_mean": 0.0007582775360788219, "clip_ratio/low_min": 2.1455543901538476e-05, "clip_ratio/region_mean": 0.001574317371705547, "epoch": 3.3872849227179938, "grad_norm": 1.6478874683380127, "learning_rate": 1e-06, "loss": -0.0615, "step": 1450 }, { "clip_ratio/high_max": 0.0026139259280171245, "clip_ratio/high_mean": 0.0008990621299744816, "clip_ratio/low_mean": 0.0007806779221937177, "clip_ratio/low_min": 1.090560090233339e-05, "clip_ratio/region_mean": 0.0016797400385257788, "epoch": 3.389617964421114, "grad_norm": 0.29945632815361023, "learning_rate": 1e-06, "loss": -0.0619, "step": 1451 }, { "clip_ratio/high_max": 0.002650615948368795, "clip_ratio/high_mean": 0.0009436394821022986, "clip_ratio/low_mean": 0.0008735874598642113, "clip_ratio/low_min": 3.597122122300789e-05, "clip_ratio/region_mean": 0.0018172269483329728, "epoch": 3.3919510061242346, "grad_norm": 0.2675054967403412, "learning_rate": 1e-06, "loss": -0.062, "step": 1452 }, { "clip_ratio/high_max": 0.0022722130597685464, "clip_ratio/high_mean": 0.0007559601763205137, "clip_ratio/low_mean": 0.0007390871451207204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014950473378121387, "completions/clipped_ratio": 0.2154017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 1431.6016845703125, "completions/mean_terminated_length": 700.1237182617188, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 3.394284047827355, "grad_norm": 956409.25, "learning_rate": 1e-06, "loss": 28.5584, "num_tokens": 216486012.0, "reward": 0.4977678656578064, "reward_std": 0.1248244121670723, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 1453 }, { "clip_ratio/high_max": 0.0020125456794630736, "clip_ratio/high_mean": 0.0006794047676521586, "clip_ratio/low_mean": 0.0007644267216164735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014438314901781268, "epoch": 3.3966170895304755, "grad_norm": 1.5537241697311401, "learning_rate": 1e-06, "loss": -0.0334, "step": 1454 }, { "clip_ratio/high_max": 0.0023974402829480823, "clip_ratio/high_mean": 0.0007301205469048, "clip_ratio/low_mean": 0.0010675312519197178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017976518174691591, "epoch": 3.3989501312335957, "grad_norm": 1.2462769746780396, "learning_rate": 1e-06, "loss": -0.0334, "step": 1455 }, { "clip_ratio/high_max": 0.0022511595670948736, "clip_ratio/high_mean": 0.0007526909093940048, "clip_ratio/low_mean": 0.0012235734357091133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019762643751164433, "epoch": 3.4012831729367163, "grad_norm": 5.232125759124756, "learning_rate": 1e-06, "loss": -0.0285, "step": 1456 }, { "clip_ratio/high_max": 0.002477000845829025, "clip_ratio/high_mean": 0.000874106801347807, "clip_ratio/low_mean": 0.0007304537170966796, "clip_ratio/low_min": 1.3394770576269366e-05, "clip_ratio/region_mean": 0.0016045605625549797, "completions/clipped_ratio": 0.1919642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3281.0, "completions/mean_length": 1337.2366943359375, "completions/mean_terminated_length": 681.8397827148438, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 3.4036162146398365, "grad_norm": 0.6610177159309387, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 217064624.0, "reward": 0.5055803656578064, "reward_std": 0.15405867993831635, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1457 }, { "clip_ratio/high_max": 0.0025966803877963684, "clip_ratio/high_mean": 0.0010242606949759647, "clip_ratio/low_mean": 0.0008860125126375351, "clip_ratio/low_min": 4.102111415704712e-05, "clip_ratio/region_mean": 0.0019102732585452031, "epoch": 3.405949256342957, "grad_norm": 0.5430633425712585, "learning_rate": 1e-06, "loss": -0.0412, "step": 1458 }, { "clip_ratio/high_max": 0.0026237162746838294, "clip_ratio/high_mean": 0.0010075290520035196, "clip_ratio/low_mean": 0.0011132642207485333, "clip_ratio/low_min": 3.062974792555906e-05, "clip_ratio/region_mean": 0.0021207932732068002, "epoch": 3.4082822980460774, "grad_norm": 5.0520830154418945, "learning_rate": 1e-06, "loss": -0.0411, "step": 1459 }, { "clip_ratio/high_max": 0.002394500195805449, "clip_ratio/high_mean": 0.0008536154782632366, "clip_ratio/low_mean": 0.0010533288832448306, "clip_ratio/low_min": 3.181233296345454e-05, "clip_ratio/region_mean": 0.0019069443587795831, "epoch": 3.410615339749198, "grad_norm": 0.3326270580291748, "learning_rate": 1e-06, "loss": -0.0414, "step": 1460 }, { "clip_ratio/high_max": 0.0026168275653617457, "clip_ratio/high_mean": 0.001006594537102501, "clip_ratio/low_mean": 0.0007466846982424613, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017532792044221424, "completions/clipped_ratio": 0.1830357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3655.0, "completions/mean_length": 1360.016845703125, "completions/mean_terminated_length": 747.036865234375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 3.4129483814523183, "grad_norm": 0.47114115953445435, "learning_rate": 1e-06, "loss": -0.0724, "num_tokens": 217713455.0, "reward": 0.4464285969734192, "reward_std": 0.18569716811180115, "rewards/verify_math_reward/mean": 0.4464285671710968, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 1461 }, { "clip_ratio/high_max": 0.00259199431820889, "clip_ratio/high_mean": 0.0010181909128732514, "clip_ratio/low_mean": 0.0009105109429583536, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001928701876749983, "epoch": 3.415281423155439, "grad_norm": 0.6667231321334839, "learning_rate": 1e-06, "loss": -0.0724, "step": 1462 }, { "clip_ratio/high_max": 0.0026885937768383883, "clip_ratio/high_mean": 0.0011033072423742851, "clip_ratio/low_mean": 0.0011558408950804733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002259148081066087, "epoch": 3.417614464858559, "grad_norm": 0.4120825529098511, "learning_rate": 1e-06, "loss": -0.0727, "step": 1463 }, { "clip_ratio/high_max": 0.0028971399915462825, "clip_ratio/high_mean": 0.001048358255502535, "clip_ratio/low_mean": 0.0012377822822600137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022861406032461673, "epoch": 3.41994750656168, "grad_norm": 0.44905370473861694, "learning_rate": 1e-06, "loss": -0.0728, "step": 1464 }, { "clip_ratio/high_max": 0.00258273923464003, "clip_ratio/high_mean": 0.0009798725968721556, "clip_ratio/low_mean": 0.0006837506243755342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016636232394375838, "completions/clipped_ratio": 0.2165178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 1433.4967041015625, "completions/mean_terminated_length": 697.7051391601562, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.4222805482648004, "grad_norm": 0.45220309495925903, "learning_rate": 1e-06, "loss": -0.0874, "num_tokens": 218301452.0, "reward": 0.4732142984867096, "reward_std": 0.15473198890686035, "rewards/verify_math_reward/mean": 0.4732142984867096, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1465 }, { "clip_ratio/high_max": 0.0036565654154401273, "clip_ratio/high_mean": 0.0011712867562891915, "clip_ratio/low_mean": 0.0007726553576503647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001943942108482588, "epoch": 3.4246135899679206, "grad_norm": 0.5509898662567139, "learning_rate": 1e-06, "loss": -0.0876, "step": 1466 }, { "clip_ratio/high_max": 0.0031363007074105553, "clip_ratio/high_mean": 0.001104411385313142, "clip_ratio/low_mean": 0.0009734435079735704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002077854958770331, "epoch": 3.4269466316710413, "grad_norm": 0.588639497756958, "learning_rate": 1e-06, "loss": -0.0877, "step": 1467 }, { "clip_ratio/high_max": 0.00314172944490565, "clip_ratio/high_mean": 0.0011032390120817581, "clip_ratio/low_mean": 0.0010251918338326504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021284308822941966, "epoch": 3.4292796733741615, "grad_norm": 0.32244259119033813, "learning_rate": 1e-06, "loss": -0.0878, "step": 1468 }, { "clip_ratio/high_max": 0.002830769430147484, "clip_ratio/high_mean": 0.0010965574747388018, "clip_ratio/low_mean": 0.0006272844429986435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017238419313798659, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3644.0, "completions/mean_length": 1111.8773193359375, "completions/mean_terminated_length": 632.5621337890625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.431612715077282, "grad_norm": 1.060254693031311, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 218873870.0, "reward": 0.613839328289032, "reward_std": 0.157290980219841, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1469 }, { "clip_ratio/high_max": 0.0034705041398410685, "clip_ratio/high_mean": 0.0011885528583661653, "clip_ratio/low_mean": 0.0007707004247095028, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001959253306267783, "epoch": 3.4339457567804024, "grad_norm": 2.7630844116210938, "learning_rate": 1e-06, "loss": -0.0338, "step": 1470 }, { "clip_ratio/high_max": 0.003450339580012951, "clip_ratio/high_mean": 0.001189078586321557, "clip_ratio/low_mean": 0.0008569874607928796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002046066063485341, "epoch": 3.436278798483523, "grad_norm": 0.4458649754524231, "learning_rate": 1e-06, "loss": -0.0357, "step": 1471 }, { "clip_ratio/high_max": 0.0033529688516864553, "clip_ratio/high_mean": 0.0012041284244332928, "clip_ratio/low_mean": 0.0009308319622505223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021349603557609953, "epoch": 3.4386118401866432, "grad_norm": 0.3633105456829071, "learning_rate": 1e-06, "loss": -0.0359, "step": 1472 }, { "clip_ratio/high_max": 0.002687696513021365, "clip_ratio/high_mean": 0.0011311529815429822, "clip_ratio/low_mean": 0.0006261786788854806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001757331658154726, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 1318.208740234375, "completions/mean_terminated_length": 686.5465698242188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.440944881889764, "grad_norm": 0.6490761041641235, "learning_rate": 1e-06, "loss": -0.0679, "num_tokens": 219475401.0, "reward": 0.4888392984867096, "reward_std": 0.16266053915023804, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 1473 }, { "clip_ratio/high_max": 0.0031885477947071195, "clip_ratio/high_mean": 0.0012420486691553378, "clip_ratio/low_mean": 0.0007893611295912706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002031409792834893, "epoch": 3.443277923592884, "grad_norm": 48.72343444824219, "learning_rate": 1e-06, "loss": -0.0638, "step": 1474 }, { "clip_ratio/high_max": 0.0028009132729494013, "clip_ratio/high_mean": 0.001158807110186899, "clip_ratio/low_mean": 0.0007814028949724161, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019402100224397145, "epoch": 3.4456109652960047, "grad_norm": 0.40078863501548767, "learning_rate": 1e-06, "loss": -0.0681, "step": 1475 }, { "clip_ratio/high_max": 0.002803771414619405, "clip_ratio/high_mean": 0.0012079568878107239, "clip_ratio/low_mean": 0.0008747139045226504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020826708278036676, "epoch": 3.447944006999125, "grad_norm": 0.3964890241622925, "learning_rate": 1e-06, "loss": -0.0681, "step": 1476 }, { "clip_ratio/high_max": 0.0022852715555927716, "clip_ratio/high_mean": 0.000826284373033559, "clip_ratio/low_mean": 0.0006967903636905248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015230747412715573, "completions/clipped_ratio": 0.1997767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 1425.680908203125, "completions/mean_terminated_length": 759.0321044921875, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 3.4502770487022456, "grad_norm": 0.4748955965042114, "learning_rate": 1e-06, "loss": -0.0671, "num_tokens": 220105491.0, "reward": 0.4642857313156128, "reward_std": 0.1561630219221115, "rewards/verify_math_reward/mean": 0.4642857015132904, "rewards/verify_math_reward/std": 0.4990013837814331, "step": 1477 }, { "clip_ratio/high_max": 0.0027882210451934952, "clip_ratio/high_mean": 0.0009468478438066086, "clip_ratio/low_mean": 0.0008203340094041778, "clip_ratio/low_min": 9.468262760492507e-06, "clip_ratio/region_mean": 0.001767181896866532, "epoch": 3.452610090405366, "grad_norm": 0.3786201477050781, "learning_rate": 1e-06, "loss": -0.0672, "step": 1478 }, { "clip_ratio/high_max": 0.002548465443396708, "clip_ratio/high_mean": 0.0009504965109954355, "clip_ratio/low_mean": 0.000992875466181431, "clip_ratio/low_min": 9.468262760492507e-06, "clip_ratio/region_mean": 0.0019433720080996864, "epoch": 3.4549431321084865, "grad_norm": 0.5422167778015137, "learning_rate": 1e-06, "loss": -0.0673, "step": 1479 }, { "clip_ratio/high_max": 0.0028620118828257546, "clip_ratio/high_mean": 0.000964341305916605, "clip_ratio/low_mean": 0.001032418227623566, "clip_ratio/low_min": 2.840478737198282e-05, "clip_ratio/region_mean": 0.001996759536268655, "epoch": 3.457276173811607, "grad_norm": 0.7680952548980713, "learning_rate": 1e-06, "loss": -0.0674, "step": 1480 }, { "clip_ratio/high_max": 0.0027107095665996894, "clip_ratio/high_mean": 0.0009120432023337344, "clip_ratio/low_mean": 0.0007062681142997462, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016183113584702369, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3813.0, "completions/mean_length": 1177.4207763671875, "completions/mean_terminated_length": 659.670166015625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.4596092155147273, "grad_norm": 0.9439178705215454, "learning_rate": 1e-06, "loss": -0.0369, "num_tokens": 220692708.0, "reward": 0.5658482313156128, "reward_std": 0.13978207111358643, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1481 }, { "clip_ratio/high_max": 0.003011024818988517, "clip_ratio/high_mean": 0.0009993771600420587, "clip_ratio/low_mean": 0.0008645403013360919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018639174777490553, "epoch": 3.4619422572178475, "grad_norm": 0.31614717841148376, "learning_rate": 1e-06, "loss": -0.0372, "step": 1482 }, { "clip_ratio/high_max": 0.0029101353502483107, "clip_ratio/high_mean": 0.0009461423524044221, "clip_ratio/low_mean": 0.000968854021266452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019149963845848106, "epoch": 3.464275298920968, "grad_norm": 6.578280448913574, "learning_rate": 1e-06, "loss": 1.5906, "step": 1483 }, { "clip_ratio/high_max": 0.003044581706490135, "clip_ratio/high_mean": 0.0009638575593271526, "clip_ratio/low_mean": 0.0010077247407025425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001971582336409483, "epoch": 3.466608340624089, "grad_norm": 13561081.0, "learning_rate": 1e-06, "loss": 619.1266, "step": 1484 }, { "clip_ratio/high_max": 0.003312337721581571, "clip_ratio/high_mean": 0.0011443660805525724, "clip_ratio/low_mean": 0.0009136506814684253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020580167620209977, "completions/clipped_ratio": 0.2098214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3599.0, "completions/mean_length": 1400.92529296875, "completions/mean_terminated_length": 685.2838745117188, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 3.468941382327209, "grad_norm": 0.4256632626056671, "learning_rate": 1e-06, "loss": -0.0685, "num_tokens": 221273881.0, "reward": 0.4698660969734192, "reward_std": 0.17438660562038422, "rewards/verify_math_reward/mean": 0.4698660671710968, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 1485 }, { "clip_ratio/high_max": 0.0030695073583046906, "clip_ratio/high_mean": 0.0011310187637718627, "clip_ratio/low_mean": 0.0010537164944253163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021847352472832426, "epoch": 3.4712744240303297, "grad_norm": 0.5320544838905334, "learning_rate": 1e-06, "loss": -0.0685, "step": 1486 }, { "clip_ratio/high_max": 0.0032982815464492887, "clip_ratio/high_mean": 0.001185273769806372, "clip_ratio/low_mean": 0.0011974889475823147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002382762722845655, "epoch": 3.47360746573345, "grad_norm": 0.45095667243003845, "learning_rate": 1e-06, "loss": -0.0687, "step": 1487 }, { "clip_ratio/high_max": 0.003320811920275446, "clip_ratio/high_mean": 0.0011784013731812593, "clip_ratio/low_mean": 0.0013965167163405567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002574918049504049, "epoch": 3.4759405074365706, "grad_norm": 0.49028441309928894, "learning_rate": 1e-06, "loss": -0.0689, "step": 1488 }, { "clip_ratio/high_max": 0.0024183129935408942, "clip_ratio/high_mean": 0.0007782882494211663, "clip_ratio/low_mean": 0.0005262536933514639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013045419618720189, "completions/clipped_ratio": 0.2020089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3249.0, "completions/mean_length": 1399.8538818359375, "completions/mean_terminated_length": 717.3328857421875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 3.478273549139691, "grad_norm": 0.4511207044124603, "learning_rate": 1e-06, "loss": -0.079, "num_tokens": 221876142.0, "reward": 0.5145089626312256, "reward_std": 0.14286282658576965, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 1489 }, { "clip_ratio/high_max": 0.0028768761694664136, "clip_ratio/high_mean": 0.0008735381798032904, "clip_ratio/low_mean": 0.000723124871001346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015966630162438378, "epoch": 3.4806065908428114, "grad_norm": 0.3104996681213379, "learning_rate": 1e-06, "loss": -0.0794, "step": 1490 }, { "clip_ratio/high_max": 0.002584938731160946, "clip_ratio/high_mean": 0.0008581855618103873, "clip_ratio/low_mean": 0.0007431561061821412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016013416498026345, "epoch": 3.4829396325459316, "grad_norm": 0.26189693808555603, "learning_rate": 1e-06, "loss": -0.0794, "step": 1491 }, { "clip_ratio/high_max": 0.0028155655090813525, "clip_ratio/high_mean": 0.0009479040490987245, "clip_ratio/low_mean": 0.000841173239678028, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001789077268767869, "epoch": 3.4852726742490523, "grad_norm": 0.32348302006721497, "learning_rate": 1e-06, "loss": -0.0795, "step": 1492 }, { "clip_ratio/high_max": 0.002898308441217523, "clip_ratio/high_mean": 0.001088519402401289, "clip_ratio/low_mean": 0.0006667219204246067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017552412900840864, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 1272.36279296875, "completions/mean_terminated_length": 749.4669189453125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.4876057159521725, "grad_norm": 0.894498348236084, "learning_rate": 1e-06, "loss": -0.0611, "num_tokens": 222540323.0, "reward": 0.5133928656578064, "reward_std": 0.1744946837425232, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1493 }, { "clip_ratio/high_max": 0.003328510108985938, "clip_ratio/high_mean": 0.001277792063774541, "clip_ratio/low_mean": 0.0007881313413236057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020659233850892633, "epoch": 3.489938757655293, "grad_norm": 0.42834755778312683, "learning_rate": 1e-06, "loss": -0.0614, "step": 1494 }, { "clip_ratio/high_max": 0.003205296889063902, "clip_ratio/high_mean": 0.0013041844904364552, "clip_ratio/low_mean": 0.0009672907417552778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002271475277666468, "epoch": 3.4922717993584134, "grad_norm": 1.6462644338607788, "learning_rate": 1e-06, "loss": -0.0615, "step": 1495 }, { "clip_ratio/high_max": 0.0032194594386965036, "clip_ratio/high_mean": 0.001212324459629599, "clip_ratio/low_mean": 0.0009864314597507473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021987558939144947, "epoch": 3.494604841061534, "grad_norm": 0.5380589962005615, "learning_rate": 1e-06, "loss": -0.0616, "step": 1496 }, { "clip_ratio/high_max": 0.0030381695687538013, "clip_ratio/high_mean": 0.0009994318970711902, "clip_ratio/low_mean": 0.0005840849717060337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015835168633202557, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4010.0, "completions/mean_length": 1294.9654541015625, "completions/mean_terminated_length": 727.2389526367188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.4969378827646542, "grad_norm": 0.5230567455291748, "learning_rate": 1e-06, "loss": -0.0703, "num_tokens": 223160092.0, "reward": 0.4854910969734192, "reward_std": 0.15026184916496277, "rewards/verify_math_reward/mean": 0.4854910671710968, "rewards/verify_math_reward/std": 0.5000686049461365, "step": 1497 }, { "clip_ratio/high_max": 0.0031072988058440387, "clip_ratio/high_mean": 0.0011033644077542704, "clip_ratio/low_mean": 0.0007342589869949734, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018376233856542967, "epoch": 3.499270924467775, "grad_norm": 0.2986593246459961, "learning_rate": 1e-06, "loss": -0.0705, "step": 1498 }, { "clip_ratio/high_max": 0.00312319298973307, "clip_ratio/high_mean": 0.0010820763291121693, "clip_ratio/low_mean": 0.0007627687209605938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001844845031882869, "epoch": 3.5016039661708955, "grad_norm": 0.6152957677841187, "learning_rate": 1e-06, "loss": -0.0704, "step": 1499 }, { "clip_ratio/high_max": 0.0031634042315999977, "clip_ratio/high_mean": 0.0011441084043326555, "clip_ratio/low_mean": 0.0009495124068052974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020936208093189634, "epoch": 3.5039370078740157, "grad_norm": 0.28919389843940735, "learning_rate": 1e-06, "loss": -0.0707, "step": 1500 }, { "clip_ratio/high_max": 0.0031151902585406788, "clip_ratio/high_mean": 0.0010764049602585146, "clip_ratio/low_mean": 0.0005723506765207276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001648755707719829, "completions/clipped_ratio": 0.2098214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3609.0, "completions/mean_length": 1491.7132568359375, "completions/mean_terminated_length": 800.1793823242188, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.506270049577136, "grad_norm": 0.3735395669937134, "learning_rate": 1e-06, "loss": -0.0764, "num_tokens": 223815035.0, "reward": 0.4620535969734192, "reward_std": 0.1669459044933319, "rewards/verify_math_reward/mean": 0.4620535671710968, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1501 }, { "clip_ratio/high_max": 0.0028125134340371005, "clip_ratio/high_mean": 0.001023933229589602, "clip_ratio/low_mean": 0.0007939422303024912, "clip_ratio/low_min": 2.2112153601483442e-05, "clip_ratio/region_mean": 0.0018178754762629978, "epoch": 3.5086030912802566, "grad_norm": 0.7696577310562134, "learning_rate": 1e-06, "loss": -0.0765, "step": 1502 }, { "clip_ratio/high_max": 0.0029385212474153377, "clip_ratio/high_mean": 0.0010509847234061453, "clip_ratio/low_mean": 0.000723159490917169, "clip_ratio/low_min": 9.432538718101569e-06, "clip_ratio/region_mean": 0.0017741441624821164, "epoch": 3.5109361329833773, "grad_norm": 8.600432395935059, "learning_rate": 1e-06, "loss": -0.0764, "step": 1503 }, { "clip_ratio/high_max": 0.00270221153186867, "clip_ratio/high_mean": 0.0009549498554406455, "clip_ratio/low_mean": 0.000884961355041014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001839911230490543, "epoch": 3.5132691746864975, "grad_norm": 0.31163957715034485, "learning_rate": 1e-06, "loss": -0.0767, "step": 1504 }, { "clip_ratio/high_max": 0.002737242975854315, "clip_ratio/high_mean": 0.001057139450495015, "clip_ratio/low_mean": 0.0007974750178618706, "clip_ratio/low_min": 4.091206574230455e-05, "clip_ratio/region_mean": 0.0018546144965512212, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3920.0, "completions/mean_length": 1331.6864013671875, "completions/mean_terminated_length": 757.9608764648438, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 3.515602216389618, "grad_norm": 0.4876993000507355, "learning_rate": 1e-06, "loss": -0.0619, "num_tokens": 224473738.0, "reward": 0.5133928656578064, "reward_std": 0.15804165601730347, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1505 }, { "clip_ratio/high_max": 0.00283440385828726, "clip_ratio/high_mean": 0.0010691314982977929, "clip_ratio/low_mean": 0.0008496619138895767, "clip_ratio/low_min": 2.72747111011995e-05, "clip_ratio/region_mean": 0.0019187934522051364, "epoch": 3.5179352580927383, "grad_norm": 0.47961196303367615, "learning_rate": 1e-06, "loss": -0.0618, "step": 1506 }, { "clip_ratio/high_max": 0.003138916232273914, "clip_ratio/high_mean": 0.0010530867984925862, "clip_ratio/low_mean": 0.0009815421617531683, "clip_ratio/low_min": 4.091206574230455e-05, "clip_ratio/region_mean": 0.0020346289202279877, "epoch": 3.520268299795859, "grad_norm": 1.3522722721099854, "learning_rate": 1e-06, "loss": -0.0621, "step": 1507 }, { "clip_ratio/high_max": 0.002867015471565537, "clip_ratio/high_mean": 0.0011240880867262604, "clip_ratio/low_mean": 0.0011179630373590044, "clip_ratio/low_min": 5.4549422202399e-05, "clip_ratio/region_mean": 0.0022420510795200244, "epoch": 3.522601341498979, "grad_norm": 0.3971919119358063, "learning_rate": 1e-06, "loss": -0.0623, "step": 1508 }, { "clip_ratio/high_max": 0.0026077960537804756, "clip_ratio/high_mean": 0.0009627149265725166, "clip_ratio/low_mean": 0.0007769200447000912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017396350049239118, "completions/clipped_ratio": 0.1986607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 1392.5179443359375, "completions/mean_terminated_length": 722.2952270507812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 3.5249343832021, "grad_norm": 0.3881731629371643, "learning_rate": 1e-06, "loss": -0.0978, "num_tokens": 225087202.0, "reward": 0.4508928656578064, "reward_std": 0.17092566192150116, "rewards/verify_math_reward/mean": 0.4508928656578064, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1509 }, { "clip_ratio/high_max": 0.0030743950555915944, "clip_ratio/high_mean": 0.0010931126525974832, "clip_ratio/low_mean": 0.000945990052059642, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020391027501318604, "epoch": 3.52726742490522, "grad_norm": 0.5408709049224854, "learning_rate": 1e-06, "loss": -0.0978, "step": 1510 }, { "clip_ratio/high_max": 0.0030111412779660895, "clip_ratio/high_mean": 0.0011243318767810706, "clip_ratio/low_mean": 0.0010243530578009086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002148684929125011, "epoch": 3.5296004666083407, "grad_norm": 0.349931925535202, "learning_rate": 1e-06, "loss": -0.0981, "step": 1511 }, { "clip_ratio/high_max": 0.0030716020555701107, "clip_ratio/high_mean": 0.0010662874101399211, "clip_ratio/low_mean": 0.0011913683847524226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022576558112632483, "epoch": 3.531933508311461, "grad_norm": 0.325325071811676, "learning_rate": 1e-06, "loss": -0.0981, "step": 1512 }, { "clip_ratio/high_max": 0.0028457486259867437, "clip_ratio/high_mean": 0.0012267531928955577, "clip_ratio/low_mean": 0.0008973069670901168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021240602145553567, "completions/clipped_ratio": 0.2053571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 1432.15185546875, "completions/mean_terminated_length": 743.7415771484375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.5342665500145816, "grad_norm": 7.858452796936035, "learning_rate": 1e-06, "loss": -0.0874, "num_tokens": 225701386.0, "reward": 0.5133928656578064, "reward_std": 0.2097053974866867, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1513 }, { "clip_ratio/high_max": 0.0031184791296254843, "clip_ratio/high_mean": 0.001282796267332742, "clip_ratio/low_mean": 0.0008872104917827528, "clip_ratio/low_min": 1.7745598597684875e-05, "clip_ratio/region_mean": 0.0021700067445635796, "epoch": 3.536599591717702, "grad_norm": 0.5600501298904419, "learning_rate": 1e-06, "loss": -0.0877, "step": 1514 }, { "clip_ratio/high_max": 0.003515381846227683, "clip_ratio/high_mean": 0.0015191423954092897, "clip_ratio/low_mean": 0.0010809259838424623, "clip_ratio/low_min": 3.549119719536975e-05, "clip_ratio/region_mean": 0.00260006841563154, "epoch": 3.5389326334208224, "grad_norm": 8932.5478515625, "learning_rate": 1e-06, "loss": 0.6948, "step": 1515 }, { "clip_ratio/high_max": 0.0030107581260381266, "clip_ratio/high_mean": 0.0013098793351673521, "clip_ratio/low_mean": 0.0011069025604228955, "clip_ratio/low_min": 3.549119719536975e-05, "clip_ratio/region_mean": 0.002416781928332057, "epoch": 3.5412656751239426, "grad_norm": 0.4338574707508087, "learning_rate": 1e-06, "loss": -0.088, "step": 1516 }, { "clip_ratio/high_max": 0.003421342058572918, "clip_ratio/high_mean": 0.001179260423668893, "clip_ratio/low_mean": 0.0007395694401566288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019188298501831014, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 1231.68310546875, "completions/mean_terminated_length": 674.0960083007812, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 3.5435987168270633, "grad_norm": 0.8090777397155762, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 226307038.0, "reward": 0.486607164144516, "reward_std": 0.17874684929847717, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1517 }, { "clip_ratio/high_max": 0.004090896516572684, "clip_ratio/high_mean": 0.0014086707415117417, "clip_ratio/low_mean": 0.0009072484681382775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023159191841841675, "epoch": 3.545931758530184, "grad_norm": 0.5385518074035645, "learning_rate": 1e-06, "loss": -0.059, "step": 1518 }, { "clip_ratio/high_max": 0.003995766615844332, "clip_ratio/high_mean": 0.0014106312846706714, "clip_ratio/low_mean": 0.001139258515649999, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025498898976366036, "epoch": 3.548264800233304, "grad_norm": 1.22903311252594, "learning_rate": 1e-06, "loss": -0.059, "step": 1519 }, { "clip_ratio/high_max": 0.004032175405882299, "clip_ratio/high_mean": 0.0013127156198606826, "clip_ratio/low_mean": 0.001218735003931215, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025314506638096645, "epoch": 3.5505978419364244, "grad_norm": 0.43519750237464905, "learning_rate": 1e-06, "loss": -0.0591, "step": 1520 }, { "clip_ratio/high_max": 0.002897451209719293, "clip_ratio/high_mean": 0.0011557502984942403, "clip_ratio/low_mean": 0.0009844902979239123, "clip_ratio/low_min": 1.2269336366443895e-05, "clip_ratio/region_mean": 0.0021402405836852267, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 1251.5748291015625, "completions/mean_terminated_length": 702.384765625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.552930883639545, "grad_norm": 1.4349949359893799, "learning_rate": 1e-06, "loss": -0.0437, "num_tokens": 226917761.0, "reward": 0.5133928656578064, "reward_std": 0.2102285474538803, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1521 }, { "clip_ratio/high_max": 0.0032783313290565275, "clip_ratio/high_mean": 0.0012907157761219423, "clip_ratio/low_mean": 0.0010982252042595064, "clip_ratio/low_min": 2.453867273288779e-05, "clip_ratio/region_mean": 0.0023889410294941626, "epoch": 3.5552639253426657, "grad_norm": 0.7636923789978027, "learning_rate": 1e-06, "loss": -0.0443, "step": 1522 }, { "clip_ratio/high_max": 0.003350207749463152, "clip_ratio/high_mean": 0.0013439085669233464, "clip_ratio/low_mean": 0.0012666268885368481, "clip_ratio/low_min": 1.2269336366443895e-05, "clip_ratio/region_mean": 0.002610535389976576, "epoch": 3.557596967045786, "grad_norm": 3.3632936477661133, "learning_rate": 1e-06, "loss": -0.0441, "step": 1523 }, { "clip_ratio/high_max": 0.003127641604805831, "clip_ratio/high_mean": 0.0012994418993912404, "clip_ratio/low_mean": 0.001317184607614763, "clip_ratio/low_min": 3.680801091832109e-05, "clip_ratio/region_mean": 0.0026166264433413744, "epoch": 3.5599300087489065, "grad_norm": 0.3948858678340912, "learning_rate": 1e-06, "loss": -0.0445, "step": 1524 }, { "clip_ratio/high_max": 0.0034138827104470693, "clip_ratio/high_mean": 0.0011889202887687134, "clip_ratio/low_mean": 0.0010349585936637595, "clip_ratio/low_min": 3.16937112074811e-05, "clip_ratio/region_mean": 0.0022238788806134835, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3956.0, "completions/mean_length": 1237.79248046875, "completions/mean_terminated_length": 708.4946899414062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.5622630504520267, "grad_norm": 2.1125569343566895, "learning_rate": 1e-06, "loss": -0.0689, "num_tokens": 227544215.0, "reward": 0.5011160969734192, "reward_std": 0.18298916518688202, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1525 }, { "clip_ratio/high_max": 0.0033390223543392494, "clip_ratio/high_mean": 0.001238075528817717, "clip_ratio/low_mean": 0.0011863707150041591, "clip_ratio/low_min": 1.2320126188569702e-05, "clip_ratio/region_mean": 0.002424446225631982, "epoch": 3.5645960921551474, "grad_norm": 0.5130938291549683, "learning_rate": 1e-06, "loss": -0.0691, "step": 1526 }, { "clip_ratio/high_max": 0.0036364138140925206, "clip_ratio/high_mean": 0.0013615678617497906, "clip_ratio/low_mean": 0.0013031693815719336, "clip_ratio/low_min": 6.160062912385911e-05, "clip_ratio/region_mean": 0.0026647373379091732, "epoch": 3.5669291338582676, "grad_norm": 57.14775085449219, "learning_rate": 1e-06, "loss": -0.0654, "step": 1527 }, { "clip_ratio/high_max": 0.003419830580241978, "clip_ratio/high_mean": 0.0012362951420072932, "clip_ratio/low_mean": 0.0013484062292263843, "clip_ratio/low_min": 7.39207534934394e-05, "clip_ratio/region_mean": 0.002584701396699529, "epoch": 3.5692621755613883, "grad_norm": 0.47838741540908813, "learning_rate": 1e-06, "loss": -0.0693, "step": 1528 }, { "clip_ratio/high_max": 0.0026106782679562457, "clip_ratio/high_mean": 0.0010008603967435192, "clip_ratio/low_mean": 0.0007162520760175539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001717112485493999, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 1280.8895263671875, "completions/mean_terminated_length": 746.2802124023438, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 3.5715952172645085, "grad_norm": 5.896371841430664, "learning_rate": 1e-06, "loss": -0.0818, "num_tokens": 228202172.0, "reward": 0.478794664144516, "reward_std": 0.17539693415164948, "rewards/verify_math_reward/mean": 0.4787946343421936, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1529 }, { "clip_ratio/high_max": 0.0025759812488104217, "clip_ratio/high_mean": 0.0010808590668602847, "clip_ratio/low_mean": 0.0007940254126879154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001874884452263359, "epoch": 3.573928258967629, "grad_norm": 0.41903266310691833, "learning_rate": 1e-06, "loss": -0.0822, "step": 1530 }, { "clip_ratio/high_max": 0.0028501353444880806, "clip_ratio/high_mean": 0.0011562914623937104, "clip_ratio/low_mean": 0.0009930095093295677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002149301020835992, "epoch": 3.5762613006707493, "grad_norm": 0.4170808494091034, "learning_rate": 1e-06, "loss": -0.0824, "step": 1531 }, { "clip_ratio/high_max": 0.0026631919099600054, "clip_ratio/high_mean": 0.0011319951190671418, "clip_ratio/low_mean": 0.001047439916874282, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002179435068683233, "epoch": 3.57859434237387, "grad_norm": 1.241646647453308, "learning_rate": 1e-06, "loss": -0.0823, "step": 1532 }, { "clip_ratio/high_max": 0.0035390911652939394, "clip_ratio/high_mean": 0.0012610717458301224, "clip_ratio/low_mean": 0.0010240890933346236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002285160866449587, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 1262.7421875, "completions/mean_terminated_length": 688.484619140625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 3.5809273840769906, "grad_norm": 3095.3701171875, "learning_rate": 1e-06, "loss": 0.041, "num_tokens": 228808613.0, "reward": 0.5189732313156128, "reward_std": 0.20741447806358337, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1533 }, { "clip_ratio/high_max": 0.0030412865744438022, "clip_ratio/high_mean": 0.0012339803179202136, "clip_ratio/low_mean": 0.0009791485263122013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002213128813309595, "epoch": 3.583260425780111, "grad_norm": 0.7733643651008606, "learning_rate": 1e-06, "loss": -0.0702, "step": 1534 }, { "clip_ratio/high_max": 0.003405227413168177, "clip_ratio/high_mean": 0.001316345875238767, "clip_ratio/low_mean": 0.0011574533928069286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002473799242579844, "epoch": 3.585593467483231, "grad_norm": 0.4769289791584015, "learning_rate": 1e-06, "loss": -0.0704, "step": 1535 }, { "clip_ratio/high_max": 0.003690178520628251, "clip_ratio/high_mean": 0.0013967662998766173, "clip_ratio/low_mean": 0.001413329977367539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028100962954340503, "epoch": 3.5879265091863517, "grad_norm": 0.6217845678329468, "learning_rate": 1e-06, "loss": -0.0705, "step": 1536 }, { "clip_ratio/high_max": 0.0024841948797984514, "clip_ratio/high_mean": 0.0009156475935014896, "clip_ratio/low_mean": 0.0007382399926427752, "clip_ratio/low_min": 2.4870672859833576e-05, "clip_ratio/region_mean": 0.001653887618886074, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 1330.1351318359375, "completions/mean_terminated_length": 728.8600463867188, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 3.5902595508894724, "grad_norm": 0.4289133846759796, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 229438326.0, "reward": 0.447544664144516, "reward_std": 0.16239909827709198, "rewards/verify_math_reward/mean": 0.4475446343421936, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1537 }, { "clip_ratio/high_max": 0.002801508497213945, "clip_ratio/high_mean": 0.001014276354908361, "clip_ratio/low_mean": 0.0008482819084747462, "clip_ratio/low_min": 2.260397741338238e-05, "clip_ratio/region_mean": 0.0018625582379172556, "epoch": 3.5925925925925926, "grad_norm": 3.2627761363983154, "learning_rate": 1e-06, "loss": -0.0607, "step": 1538 }, { "clip_ratio/high_max": 0.002957598517241422, "clip_ratio/high_mean": 0.0011170707221026532, "clip_ratio/low_mean": 0.0008878489807102596, "clip_ratio/low_min": 1.6391293684137054e-05, "clip_ratio/region_mean": 0.00200491971190786, "epoch": 3.5949256342957128, "grad_norm": 0.37898576259613037, "learning_rate": 1e-06, "loss": -0.0611, "step": 1539 }, { "clip_ratio/high_max": 0.002977936477691401, "clip_ratio/high_mean": 0.001032527305142139, "clip_ratio/low_mean": 0.0010335026736356667, "clip_ratio/low_min": 3.278258736827411e-05, "clip_ratio/region_mean": 0.0020660300288000144, "epoch": 3.5972586759988334, "grad_norm": 0.42756304144859314, "learning_rate": 1e-06, "loss": -0.0611, "step": 1540 }, { "clip_ratio/high_max": 0.0029728418303420767, "clip_ratio/high_mean": 0.0010683575546863722, "clip_ratio/low_mean": 0.0008541029110347154, "clip_ratio/low_min": 6.0931026382604614e-05, "clip_ratio/region_mean": 0.0019224604438932147, "completions/clipped_ratio": 0.1886160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 1393.04248046875, "completions/mean_terminated_length": 764.70703125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 3.599591717701954, "grad_norm": 0.442961722612381, "learning_rate": 1e-06, "loss": -0.0828, "num_tokens": 230077556.0, "reward": 0.4587053656578064, "reward_std": 0.185699000954628, "rewards/verify_math_reward/mean": 0.4587053656578064, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1541 }, { "clip_ratio/high_max": 0.0031576820256304927, "clip_ratio/high_mean": 0.0011831381016236264, "clip_ratio/low_mean": 0.0010687715439416934, "clip_ratio/low_min": 3.0465513191302307e-05, "clip_ratio/region_mean": 0.002251909660117235, "epoch": 3.6019247594050743, "grad_norm": 0.8326497077941895, "learning_rate": 1e-06, "loss": -0.0833, "step": 1542 }, { "clip_ratio/high_max": 0.003396297717699781, "clip_ratio/high_mean": 0.001270233284230926, "clip_ratio/low_mean": 0.0012841820280300453, "clip_ratio/low_min": 7.616378570673987e-05, "clip_ratio/region_mean": 0.0025544152813381515, "epoch": 3.604257801108195, "grad_norm": 6.4878058433532715, "learning_rate": 1e-06, "loss": -0.0813, "step": 1543 }, { "clip_ratio/high_max": 0.0030118692957330495, "clip_ratio/high_mean": 0.0011119058835902251, "clip_ratio/low_mean": 0.0012922900386911351, "clip_ratio/low_min": 0.00010080645006382838, "clip_ratio/region_mean": 0.0024041958968155086, "epoch": 3.606590842811315, "grad_norm": 0.4211568832397461, "learning_rate": 1e-06, "loss": -0.0834, "step": 1544 }, { "clip_ratio/high_max": 0.0027605556024354883, "clip_ratio/high_mean": 0.0010253338114125654, "clip_ratio/low_mean": 0.0007463451511284802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017716789479891304, "completions/clipped_ratio": 0.2176339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 1474.6551513671875, "completions/mean_terminated_length": 745.465087890625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.608923884514436, "grad_norm": 1.0605182647705078, "learning_rate": 1e-06, "loss": -0.0781, "num_tokens": 230690511.0, "reward": 0.4966517984867096, "reward_std": 0.19678498804569244, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1545 }, { "clip_ratio/high_max": 0.0031976953032426536, "clip_ratio/high_mean": 0.0012567810899781762, "clip_ratio/low_mean": 0.0008922163797251415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002148997416952625, "epoch": 3.611256926217556, "grad_norm": 0.43185707926750183, "learning_rate": 1e-06, "loss": -0.0784, "step": 1546 }, { "clip_ratio/high_max": 0.00305928031593794, "clip_ratio/high_mean": 0.0012014708190690726, "clip_ratio/low_mean": 0.0010998166417266475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002301287473528646, "epoch": 3.6135899679206767, "grad_norm": 0.34776821732521057, "learning_rate": 1e-06, "loss": -0.0786, "step": 1547 }, { "clip_ratio/high_max": 0.003200961262336932, "clip_ratio/high_mean": 0.0012388513823680114, "clip_ratio/low_mean": 0.001223630224558292, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002462481592374388, "epoch": 3.615923009623797, "grad_norm": 0.604753851890564, "learning_rate": 1e-06, "loss": -0.0786, "step": 1548 }, { "clip_ratio/high_max": 0.0027924481546506286, "clip_ratio/high_mean": 0.0010131002381967846, "clip_ratio/low_mean": 0.0008626430744698155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018757433499558829, "completions/clipped_ratio": 0.2042410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 1405.4476318359375, "completions/mean_terminated_length": 714.885009765625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.6182560513269175, "grad_norm": 0.5342496037483215, "learning_rate": 1e-06, "loss": -0.0819, "num_tokens": 231289560.0, "reward": 0.4274553656578064, "reward_std": 0.1795709878206253, "rewards/verify_math_reward/mean": 0.4274553656578064, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1549 }, { "clip_ratio/high_max": 0.0027119314545416273, "clip_ratio/high_mean": 0.0010870440582948504, "clip_ratio/low_mean": 0.0010381009942648234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002125145103491377, "epoch": 3.6205890930300377, "grad_norm": 0.47871243953704834, "learning_rate": 1e-06, "loss": -0.0818, "step": 1550 }, { "clip_ratio/high_max": 0.0028433014813344926, "clip_ratio/high_mean": 0.0011198430984222796, "clip_ratio/low_mean": 0.0012948570001753978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024147000804077834, "epoch": 3.6229221347331584, "grad_norm": 0.7384235262870789, "learning_rate": 1e-06, "loss": -0.082, "step": 1551 }, { "clip_ratio/high_max": 0.003031973865290638, "clip_ratio/high_mean": 0.0010987500281771645, "clip_ratio/low_mean": 0.0013843205524608493, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002483070617017802, "epoch": 3.625255176436279, "grad_norm": 2.467336416244507, "learning_rate": 1e-06, "loss": -0.082, "step": 1552 }, { "clip_ratio/high_max": 0.002577120583737269, "clip_ratio/high_mean": 0.0009612277408450609, "clip_ratio/low_mean": 0.0010917293329839595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002052957072010031, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 1122.7421875, "completions/mean_terminated_length": 662.9600219726562, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.6275882181393992, "grad_norm": 0.8863140940666199, "learning_rate": 1e-06, "loss": -0.0348, "num_tokens": 231893337.0, "reward": 0.5401785969734192, "reward_std": 0.18588374555110931, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 1553 }, { "clip_ratio/high_max": 0.0027001031194231473, "clip_ratio/high_mean": 0.0010824640139617259, "clip_ratio/low_mean": 0.0012732991217490053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002355763102968922, "epoch": 3.6299212598425195, "grad_norm": 56.1252555847168, "learning_rate": 1e-06, "loss": -0.0342, "step": 1554 }, { "clip_ratio/high_max": 0.0025560386347933672, "clip_ratio/high_mean": 0.0010196384828304872, "clip_ratio/low_mean": 0.0013952662429801421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002414904738543555, "epoch": 3.63225430154564, "grad_norm": 0.6821632385253906, "learning_rate": 1e-06, "loss": -0.0351, "step": 1555 }, { "clip_ratio/high_max": 0.0025174570255330764, "clip_ratio/high_mean": 0.0009603287162462948, "clip_ratio/low_mean": 0.0014935802482796134, "clip_ratio/low_min": 1.0359688531025313e-05, "clip_ratio/region_mean": 0.002453909008181654, "epoch": 3.6345873432487608, "grad_norm": 6.785217761993408, "learning_rate": 1e-06, "loss": -0.0346, "step": 1556 }, { "clip_ratio/high_max": 0.0025922954009729438, "clip_ratio/high_mean": 0.001008968916721642, "clip_ratio/low_mean": 0.0009384104623677558, "clip_ratio/low_min": 3.969934186898172e-05, "clip_ratio/region_mean": 0.0019473793763609137, "completions/clipped_ratio": 0.1941964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3564.0, "completions/mean_length": 1455.138427734375, "completions/mean_terminated_length": 818.6980590820312, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 3.636920384951881, "grad_norm": 2.0183308124542236, "learning_rate": 1e-06, "loss": -0.0507, "num_tokens": 232564669.0, "reward": 0.4129464328289032, "reward_std": 0.200994074344635, "rewards/verify_math_reward/mean": 0.4129464328289032, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1557 }, { "clip_ratio/high_max": 0.002620274965011049, "clip_ratio/high_mean": 0.0010337800667912234, "clip_ratio/low_mean": 0.0011270686190982815, "clip_ratio/low_min": 3.7993919249856845e-05, "clip_ratio/region_mean": 0.002160848722269293, "epoch": 3.6392534266550016, "grad_norm": 0.6128548979759216, "learning_rate": 1e-06, "loss": -0.0508, "step": 1558 }, { "clip_ratio/high_max": 0.0031012719482532702, "clip_ratio/high_mean": 0.0011158364577568136, "clip_ratio/low_mean": 0.0012240526975801913, "clip_ratio/low_min": 5.830223744851537e-05, "clip_ratio/region_mean": 0.0023398892299155705, "epoch": 3.641586468358122, "grad_norm": 0.4138473570346832, "learning_rate": 1e-06, "loss": -0.051, "step": 1559 }, { "clip_ratio/high_max": 0.0030913962109480053, "clip_ratio/high_mean": 0.0011918995587620884, "clip_ratio/low_mean": 0.0014303800562629476, "clip_ratio/low_min": 4.664179141400382e-05, "clip_ratio/region_mean": 0.0026222795277135447, "epoch": 3.6439195100612425, "grad_norm": 0.7010294795036316, "learning_rate": 1e-06, "loss": -0.0511, "step": 1560 }, { "clip_ratio/high_max": 0.00255492686846992, "clip_ratio/high_mean": 0.0008330566670338158, "clip_ratio/low_mean": 0.0008587170314058312, "clip_ratio/low_min": 2.7660986233968288e-05, "clip_ratio/region_mean": 0.0016917736793402582, "completions/clipped_ratio": 0.1830357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 1350.796875, "completions/mean_terminated_length": 735.7513427734375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.6462525517643627, "grad_norm": 1.2315235137939453, "learning_rate": 1e-06, "loss": -0.0582, "num_tokens": 233189783.0, "reward": 0.4743303656578064, "reward_std": 0.15075181424617767, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 1561 }, { "clip_ratio/high_max": 0.0029466612832038663, "clip_ratio/high_mean": 0.0009410417624167167, "clip_ratio/low_mean": 0.0011324606311973184, "clip_ratio/low_min": 4.773421460413374e-05, "clip_ratio/region_mean": 0.0020735024227178656, "epoch": 3.6485855934674833, "grad_norm": 0.417816162109375, "learning_rate": 1e-06, "loss": -0.0582, "step": 1562 }, { "clip_ratio/high_max": 0.003680033565615304, "clip_ratio/high_mean": 0.001069348427336081, "clip_ratio/low_mean": 0.0012123541164328344, "clip_ratio/low_min": 4.767353311763145e-05, "clip_ratio/region_mean": 0.0022817025528638624, "epoch": 3.6509186351706036, "grad_norm": 0.6507503986358643, "learning_rate": 1e-06, "loss": -0.0576, "step": 1563 }, { "clip_ratio/high_max": 0.002848410036676796, "clip_ratio/high_mean": 0.000980542242359661, "clip_ratio/low_mean": 0.0013689965271623805, "clip_ratio/low_min": 6.354047218337655e-05, "clip_ratio/region_mean": 0.002349538801354356, "epoch": 3.653251676873724, "grad_norm": 0.3610619306564331, "learning_rate": 1e-06, "loss": -0.0586, "step": 1564 }, { "clip_ratio/high_max": 0.003207952744560316, "clip_ratio/high_mean": 0.0010759302858787123, "clip_ratio/low_mean": 0.0005850656671100296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016609959493507631, "completions/clipped_ratio": 0.2154017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3175.0, "completions/mean_length": 1431.5592041015625, "completions/mean_terminated_length": 700.0697021484375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.6555847185768444, "grad_norm": 0.8251699209213257, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 233780916.0, "reward": 0.4765625298023224, "reward_std": 0.1395556628704071, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1565 }, { "clip_ratio/high_max": 0.0037720408872701228, "clip_ratio/high_mean": 0.0012003518422716297, "clip_ratio/low_mean": 0.0007662143498237128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019665661893668585, "epoch": 3.657917760279965, "grad_norm": 0.8552809357643127, "learning_rate": 1e-06, "loss": -0.075, "step": 1566 }, { "clip_ratio/high_max": 0.0034726571902865544, "clip_ratio/high_mean": 0.0012743781462631887, "clip_ratio/low_mean": 0.0008400165152124828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021143947160453536, "epoch": 3.6602508019830857, "grad_norm": 0.3077991306781769, "learning_rate": 1e-06, "loss": -0.075, "step": 1567 }, { "clip_ratio/high_max": 0.0037178348429733887, "clip_ratio/high_mean": 0.0011882230428454932, "clip_ratio/low_mean": 0.0009503063356532948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002138529423973523, "epoch": 3.662583843686206, "grad_norm": 0.3889603912830353, "learning_rate": 1e-06, "loss": -0.0752, "step": 1568 }, { "clip_ratio/high_max": 0.0032760338508524, "clip_ratio/high_mean": 0.0011718711139110383, "clip_ratio/low_mean": 0.0004590770445247472, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001630948139791144, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3227.0, "completions/mean_length": 1290.712158203125, "completions/mean_terminated_length": 713.0416870117188, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 3.664916885389326, "grad_norm": 0.8128601312637329, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 234405890.0, "reward": 0.527901828289032, "reward_std": 0.16138990223407745, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1569 }, { "clip_ratio/high_max": 0.0030437940076808445, "clip_ratio/high_mean": 0.0011659772135317326, "clip_ratio/low_mean": 0.000590620807997766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017565980160725303, "epoch": 3.667249927092447, "grad_norm": 1.1310914754867554, "learning_rate": 1e-06, "loss": -0.0645, "step": 1570 }, { "clip_ratio/high_max": 0.003156876911816653, "clip_ratio/high_mean": 0.0012087565701222047, "clip_ratio/low_mean": 0.0007105054655767162, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019192620457033627, "epoch": 3.6695829687955674, "grad_norm": 0.2805728316307068, "learning_rate": 1e-06, "loss": -0.065, "step": 1571 }, { "clip_ratio/high_max": 0.0032013620802899823, "clip_ratio/high_mean": 0.0011959000457864022, "clip_ratio/low_mean": 0.0008317589372381917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002027658985753078, "epoch": 3.6719160104986877, "grad_norm": 2.379966974258423, "learning_rate": 1e-06, "loss": -0.0649, "step": 1572 }, { "clip_ratio/high_max": 0.002746791527897585, "clip_ratio/high_mean": 0.0010340511398680974, "clip_ratio/low_mean": 0.000980835327936802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002014886442339048, "completions/clipped_ratio": 0.2377232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 1547.4051513671875, "completions/mean_terminated_length": 752.6017456054688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.674249052201808, "grad_norm": 5.106496810913086, "learning_rate": 1e-06, "loss": -0.0952, "num_tokens": 235012445.0, "reward": 0.4386160969734192, "reward_std": 0.19685277342796326, "rewards/verify_math_reward/mean": 0.4386160671710968, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1573 }, { "clip_ratio/high_max": 0.002891777599870693, "clip_ratio/high_mean": 0.0011441004717198666, "clip_ratio/low_mean": 0.001232229344168445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023763298595440574, "epoch": 3.6765820939049285, "grad_norm": 4.61195182800293, "learning_rate": 1e-06, "loss": -0.0959, "step": 1574 }, { "clip_ratio/high_max": 0.003076182685617823, "clip_ratio/high_mean": 0.0011772707584896125, "clip_ratio/low_mean": 0.0012325877032708377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002409858425380662, "epoch": 3.678915135608049, "grad_norm": 0.8923947215080261, "learning_rate": 1e-06, "loss": -0.0961, "step": 1575 }, { "clip_ratio/high_max": 0.003241285216063261, "clip_ratio/high_mean": 0.0012853145162807778, "clip_ratio/low_mean": 0.0014559618975908961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027412763811298646, "epoch": 3.6812481773111694, "grad_norm": 0.9136856198310852, "learning_rate": 1e-06, "loss": -0.0962, "step": 1576 }, { "clip_ratio/high_max": 0.0022939981245144736, "clip_ratio/high_mean": 0.0007812910052962252, "clip_ratio/low_mean": 0.0005516359997272957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013329269786481746, "completions/clipped_ratio": 0.21875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3623.0, "completions/mean_length": 1441.091552734375, "completions/mean_terminated_length": 697.7171630859375, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 3.68358121901429, "grad_norm": 0.8794479370117188, "learning_rate": 1e-06, "loss": -0.0585, "num_tokens": 235585071.0, "reward": 0.4720982313156128, "reward_std": 0.14180903136730194, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1577 }, { "clip_ratio/high_max": 0.00228647825497319, "clip_ratio/high_mean": 0.0007381333352896036, "clip_ratio/low_mean": 0.0006543235367644229, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013924568593211006, "epoch": 3.6859142607174102, "grad_norm": 0.3076462149620056, "learning_rate": 1e-06, "loss": -0.0587, "step": 1578 }, { "clip_ratio/high_max": 0.0028368893181323074, "clip_ratio/high_mean": 0.0009305407638748875, "clip_ratio/low_mean": 0.0007251206989167258, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016556614500586875, "epoch": 3.688247302420531, "grad_norm": 0.4042733907699585, "learning_rate": 1e-06, "loss": -0.0588, "step": 1579 }, { "clip_ratio/high_max": 0.002328702299564611, "clip_ratio/high_mean": 0.0007655315057490952, "clip_ratio/low_mean": 0.000883278216861072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016488097389810719, "epoch": 3.690580344123651, "grad_norm": 627.02197265625, "learning_rate": 1e-06, "loss": -0.0418, "step": 1580 }, { "clip_ratio/high_max": 0.0032203914670390077, "clip_ratio/high_mean": 0.0011483033249533037, "clip_ratio/low_mean": 0.0009000349369898686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020483382686506957, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 1273.040283203125, "completions/mean_terminated_length": 682.54248046875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 3.6929133858267718, "grad_norm": 4.658331871032715, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 236179779.0, "reward": 0.5457589626312256, "reward_std": 0.19820715487003326, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1581 }, { "clip_ratio/high_max": 0.0032323574559995905, "clip_ratio/high_mean": 0.0012748992667184211, "clip_ratio/low_mean": 0.0009165804112853948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021914796307100914, "epoch": 3.695246427529892, "grad_norm": 0.7617430686950684, "learning_rate": 1e-06, "loss": -0.0667, "step": 1582 }, { "clip_ratio/high_max": 0.0036377205105964094, "clip_ratio/high_mean": 0.0014019586669746786, "clip_ratio/low_mean": 0.0010856530011551513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024876116367522627, "epoch": 3.6975794692330126, "grad_norm": 0.842507004737854, "learning_rate": 1e-06, "loss": -0.0667, "step": 1583 }, { "clip_ratio/high_max": 0.003589871608710382, "clip_ratio/high_mean": 0.0013796624989481643, "clip_ratio/low_mean": 0.0013332489627373434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027129114605486393, "epoch": 3.699912510936133, "grad_norm": 0.48713672161102295, "learning_rate": 1e-06, "loss": -0.0672, "step": 1584 }, { "clip_ratio/high_max": 0.002979301876621321, "clip_ratio/high_mean": 0.0011884826890309341, "clip_ratio/low_mean": 0.0007776773327350384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001966160001757089, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3168.0, "completions/mean_length": 1281.4921875, "completions/mean_terminated_length": 641.4808349609375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.7022455526392535, "grad_norm": 2.120486259460449, "learning_rate": 1e-06, "loss": -0.0377, "num_tokens": 236734420.0, "reward": 0.5223214626312256, "reward_std": 0.18163186311721802, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1585 }, { "clip_ratio/high_max": 0.0028854257543571293, "clip_ratio/high_mean": 0.0012765987521561328, "clip_ratio/low_mean": 0.0009344307327410206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022110294667072594, "epoch": 3.704578594342374, "grad_norm": 1.1054219007492065, "learning_rate": 1e-06, "loss": -0.0378, "step": 1586 }, { "clip_ratio/high_max": 0.003332156593387481, "clip_ratio/high_mean": 0.001304566249018535, "clip_ratio/low_mean": 0.0011101543677796144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002414720627712086, "epoch": 3.7069116360454943, "grad_norm": 0.8869757056236267, "learning_rate": 1e-06, "loss": -0.0382, "step": 1587 }, { "clip_ratio/high_max": 0.003334512533911038, "clip_ratio/high_mean": 0.0013068608386674896, "clip_ratio/low_mean": 0.001325136749073863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002631997580465395, "epoch": 3.7092446777486145, "grad_norm": 1.5591036081314087, "learning_rate": 1e-06, "loss": -0.0384, "step": 1588 }, { "clip_ratio/high_max": 0.0026519344173721038, "clip_ratio/high_mean": 0.0011152834540553158, "clip_ratio/low_mean": 0.0010204850495938445, "clip_ratio/low_min": 3.0838815291645005e-05, "clip_ratio/region_mean": 0.0021357685400289483, "completions/clipped_ratio": 0.2042410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 1424.28466796875, "completions/mean_terminated_length": 738.5568237304688, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.711577719451735, "grad_norm": 0.6522813439369202, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 237351627.0, "reward": 0.463169664144516, "reward_std": 0.18329225480556488, "rewards/verify_math_reward/mean": 0.4631696343421936, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1589 }, { "clip_ratio/high_max": 0.003259227298258338, "clip_ratio/high_mean": 0.0012373848767310847, "clip_ratio/low_mean": 0.001066531893229694, "clip_ratio/low_min": 1.1421783710829914e-05, "clip_ratio/region_mean": 0.0023039167645038106, "epoch": 3.713910761154856, "grad_norm": 0.5933325290679932, "learning_rate": 1e-06, "loss": -0.0386, "step": 1590 }, { "clip_ratio/high_max": 0.0030159272864693776, "clip_ratio/high_mean": 0.0012335793799138628, "clip_ratio/low_mean": 0.0012985866742383223, "clip_ratio/low_min": 6.989361645537429e-05, "clip_ratio/region_mean": 0.002532166057790164, "epoch": 3.716243802857976, "grad_norm": 0.449223130941391, "learning_rate": 1e-06, "loss": -0.0391, "step": 1591 }, { "clip_ratio/high_max": 0.0028928256579092704, "clip_ratio/high_mean": 0.0012661958862736356, "clip_ratio/low_mean": 0.0014681385946460068, "clip_ratio/low_min": 8.397932833759114e-05, "clip_ratio/region_mean": 0.002734334491833579, "epoch": 3.7185768445610963, "grad_norm": 0.4793533384799957, "learning_rate": 1e-06, "loss": -0.0391, "step": 1592 }, { "clip_ratio/high_max": 0.0025173574904329143, "clip_ratio/high_mean": 0.0009220475094480207, "clip_ratio/low_mean": 0.0005287979865897796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014508455315080937, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3268.0, "completions/mean_length": 1224.8326416015625, "completions/mean_terminated_length": 624.2509765625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.720909886264217, "grad_norm": 0.5549209117889404, "learning_rate": 1e-06, "loss": -0.1173, "num_tokens": 237894389.0, "reward": 0.574776828289032, "reward_std": 0.1525944322347641, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1593 }, { "clip_ratio/high_max": 0.0028480358414526563, "clip_ratio/high_mean": 0.001049179141773493, "clip_ratio/low_mean": 0.0006699175046378514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017190966682392173, "epoch": 3.7232429279673376, "grad_norm": 0.8929264545440674, "learning_rate": 1e-06, "loss": -0.1176, "step": 1594 }, { "clip_ratio/high_max": 0.0028821287778555416, "clip_ratio/high_mean": 0.0009969511902454542, "clip_ratio/low_mean": 0.0008113463009067345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018082974856952205, "epoch": 3.725575969670458, "grad_norm": 0.3758603632450104, "learning_rate": 1e-06, "loss": -0.1177, "step": 1595 }, { "clip_ratio/high_max": 0.002634988624777179, "clip_ratio/high_mean": 0.0009648987779655727, "clip_ratio/low_mean": 0.0008997826007544063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001864681347797159, "epoch": 3.7279090113735784, "grad_norm": 0.2375277876853943, "learning_rate": 1e-06, "loss": -0.1179, "step": 1596 }, { "clip_ratio/high_max": 0.0028842668180004694, "clip_ratio/high_mean": 0.00123144724057056, "clip_ratio/low_mean": 0.0010287015820722445, "clip_ratio/low_min": 0.00010233869306830456, "clip_ratio/region_mean": 0.002260148845380172, "completions/clipped_ratio": 0.1919642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 1372.5491943359375, "completions/mean_terminated_length": 725.5414428710938, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 3.7302420530766986, "grad_norm": 0.4674237370491028, "learning_rate": 1e-06, "loss": -0.0833, "num_tokens": 238499665.0, "reward": 0.4720982313156128, "reward_std": 0.24754251539707184, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1597 }, { "clip_ratio/high_max": 0.003734769794391468, "clip_ratio/high_mean": 0.0014984920017013792, "clip_ratio/low_mean": 0.001223244922584854, "clip_ratio/low_min": 0.00014248995466914494, "clip_ratio/region_mean": 0.0027217369352001697, "epoch": 3.7325750947798193, "grad_norm": 0.8385607004165649, "learning_rate": 1e-06, "loss": -0.0835, "step": 1598 }, { "clip_ratio/high_max": 0.003370248872670345, "clip_ratio/high_mean": 0.0013734847052546684, "clip_ratio/low_mean": 0.0013988766295369714, "clip_ratio/low_min": 0.0001329838187302812, "clip_ratio/region_mean": 0.0027723612802219577, "epoch": 3.7349081364829395, "grad_norm": 1.93617582321167, "learning_rate": 1e-06, "loss": -0.0834, "step": 1599 }, { "clip_ratio/high_max": 0.0028840551094617695, "clip_ratio/high_mean": 0.0012658819432544988, "clip_ratio/low_mean": 0.0015704615598224336, "clip_ratio/low_min": 0.00013439985559671186, "clip_ratio/region_mean": 0.00283634354127571, "epoch": 3.73724117818606, "grad_norm": 0.7153506278991699, "learning_rate": 1e-06, "loss": -0.0836, "step": 1600 }, { "epoch": 3.73724117818606, "step": 1600, "total_flos": 0.0, "train_loss": 13.440545084960068, "train_runtime": 65092.8727, "train_samples_per_second": 22.024, "train_steps_per_second": 0.025 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 238499665, "num_train_epochs": 4, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }