{"loss": -0.11016345, "grad_norm": 0.13259538, "learning_rate": 1.667e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000242, "completion_length": 10352.97460938, "response_clip_ratio": 0.11328125, "rewards/CosineReward": -0.002658, "rewards/RepetitionPenalty": -3.9e-07, "reward": -0.00265839, "reward_std": 0.06134121, "kl": 0.0, "clip_ratio": 0.0, "epoch": 0.21052632, "global_step/max_steps": "1/60", "percentage": "1.67%", "elapsed_time": "1h 8m 54s", "remaining_time": "2d 19h 45m 49s"} {"loss": -0.11016345, "grad_norm": 0.13200015, "learning_rate": 3.333e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000467, "kl": 0.0, "clip_ratio": 0.0, "epoch": 0.42105263, "global_step/max_steps": "2/60", "percentage": "3.33%", "elapsed_time": "1h 11m 21s", "remaining_time": "1d 10h 29m 18s"} {"loss": -0.06604709, "grad_norm": 0.08990391, "learning_rate": 5e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.00035, "completion_length": 10439.36914062, "response_clip_ratio": 0.13671875, "rewards/CosineReward": 0.00062986, "rewards/RepetitionPenalty": -1.6e-07, "reward": 0.0006297, "reward_std": 0.07172461, "kl": 9.5e-07, "clip_ratio": 1.344e-05, "epoch": 0.63157895, "global_step/max_steps": "3/60", "percentage": "5.00%", "elapsed_time": "2h 22m 48s", "remaining_time": "1d 21h 13m 12s"} {"loss": -0.06727766, "grad_norm": 0.09670949, "learning_rate": 6.667e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000458, "kl": 1.11e-05, "clip_ratio": 1.702e-05, "epoch": 0.84210526, "global_step/max_steps": "4/60", "percentage": "6.67%", "elapsed_time": "2h 25m 27s", "remaining_time": "1d 9h 56m 30s"} {"loss": -0.09315312, "grad_norm": 0.14283726, "learning_rate": 8.333e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000387, "completion_length": 10092.40820312, "response_clip_ratio": 0.11914062, "rewards/CosineReward": -0.00513406, "rewards/RepetitionPenalty": -1.8e-06, "reward": -0.00513586, "reward_std": 0.07994876, "kl": 0.00017762, "clip_ratio": 1.676e-05, "epoch": 1.21052632, "global_step/max_steps": "5/60", "percentage": "8.33%", "elapsed_time": "3h 35m 22s", "remaining_time": "1d 15h 29m 7s"} {"loss": -0.10416982, "grad_norm": 0.18263349, "learning_rate": 0.0001, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000459, "epoch": 1.42105263, "global_step/max_steps": "6/60", "percentage": "10.00%", "elapsed_time": "3h 37m 55s", "remaining_time": "1d 8h 41m 23s"} {"eval_loss": -0.53774166, "eval_completion_length": 12289.0, "eval_response_clip_ratio": 1.0, "eval_rewards/CosineReward": 0.01299669, "eval_rewards/RepetitionPenalty": 0.0, "eval_reward": 0.01299669, "eval_reward_std": 0.08769983, "eval_kl": 0.04833984, "eval_clip_ratio": 4.069e-05, "eval_runtime": 1030.1127, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "epoch": 1.42105263, "global_step/max_steps": "6/60", "percentage": "10.00%", "elapsed_time": "3h 55m 6s", "remaining_time": "1d 11h 15m 54s"} {"loss": -0.05137517, "grad_norm": 0.12912713, "learning_rate": 9.992e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000382, "kl": 0.01740646, "clip_ratio": 0.00052376, "completion_length": 10448.94921875, "response_clip_ratio": 0.1484375, "rewards/CosineReward": 0.00490983, "rewards/RepetitionPenalty": -2.1e-07, "reward": 0.00490962, "reward_std": 0.08167182, "epoch": 1.63157895, "global_step/max_steps": "7/60", "percentage": "11.67%", "elapsed_time": "5h 5m 17s", "remaining_time": "1d 14h 31m 31s"} {"loss": -0.05105743, "grad_norm": 0.26641014, "learning_rate": 9.966e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000433, "kl": 0.08959961, "clip_ratio": 0.17064847, "epoch": 1.84210526, "global_step/max_steps": "8/60", "percentage": "13.33%", "elapsed_time": "5h 7m 58s", "remaining_time": "1d 9h 21m 52s"} {"loss": -0.05842069, "grad_norm": 0.10375156, "learning_rate": 9.924e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000396, "completion_length": 10432.38476562, "response_clip_ratio": 0.25585938, "rewards/CosineReward": 0.03643618, "rewards/RepetitionPenalty": -8e-08, "reward": 0.0364361, "reward_std": 0.11898956, "kl": 0.09631348, "clip_ratio": 9.48e-06, "epoch": 2.21052632, "global_step/max_steps": "9/60", "percentage": "15.00%", "elapsed_time": "6h 19m 3s", "remaining_time": "1d 11h 47m 59s"} {"loss": -0.06491819, "grad_norm": 0.09477334, "learning_rate": 9.865e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000436, "kl": 0.11853027, "clip_ratio": 0.00360884, "epoch": 2.42105263, "global_step/max_steps": "10/60", "percentage": "16.67%", "elapsed_time": "6h 21m 47s", "remaining_time": "1d 7h 48m 57s"} {"loss": -0.04600232, "grad_norm": 0.0673914, "learning_rate": 9.79e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000405, "completion_length": 10559.296875, "response_clip_ratio": 0.36132812, "rewards/CosineReward": 0.02320497, "rewards/RepetitionPenalty": -3.3e-07, "reward": 0.02320464, "reward_std": 0.10593635, "kl": 0.12756348, "clip_ratio": 1.296e-05, "epoch": 2.63157895, "global_step/max_steps": "11/60", "percentage": "18.33%", "elapsed_time": "7h 32m 52s", "remaining_time": "1d 9h 37m 21s"} {"loss": -0.05069057, "grad_norm": 0.05781339, "learning_rate": 9.698e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000439, "epoch": 2.84210526, "global_step/max_steps": "12/60", "percentage": "20.00%", "elapsed_time": "7h 35m 40s", "remaining_time": "1d 6h 22m 42s"} {"eval_loss": 0.17524278, "eval_completion_length": 12289.0, "eval_response_clip_ratio": 1.0, "eval_rewards/CosineReward": 0.03234308, "eval_rewards/RepetitionPenalty": 0.0, "eval_reward": 0.03234308, "eval_reward_std": 0.10685289, "eval_kl": 0.22753906, "eval_clip_ratio": 4.392e-05, "eval_runtime": 1025.9041, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "epoch": 2.84210526, "global_step/max_steps": "12/60", "percentage": "20.00%", "elapsed_time": "7h 52m 46s", "remaining_time": "1d 7h 31m 5s"} {"loss": -0.02191038, "grad_norm": 0.01199417, "learning_rate": 9.591e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000399, "kl": 0.15112305, "clip_ratio": 0.00079083, "completion_length": 10652.93945312, "response_clip_ratio": 0.41992188, "rewards/CosineReward": 0.03598418, "rewards/RepetitionPenalty": -4.2e-07, "reward": 0.03598376, "reward_std": 0.1155337, "epoch": 3.21052632, "global_step/max_steps": "13/60", "percentage": "21.67%", "elapsed_time": "9h 3m 21s", "remaining_time": "1d 8h 44m 28s"} {"loss": -0.02267258, "grad_norm": 0.01075426, "learning_rate": 9.468e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000427, "kl": 0.16918945, "clip_ratio": 0.00048213, "epoch": 3.42105263, "global_step/max_steps": "14/60", "percentage": "23.33%", "elapsed_time": "9h 5m 55s", "remaining_time": "1d 5h 53m 44s"} {"loss": -0.05979916, "grad_norm": 0.01779361, "learning_rate": 9.33e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000406, "completion_length": 10482.14648438, "response_clip_ratio": 0.4765625, "rewards/CosineReward": 0.03584346, "rewards/RepetitionPenalty": -1.5e-07, "reward": 0.03584332, "reward_std": 0.11829412, "kl": 0.16674805, "clip_ratio": 1.962e-05, "epoch": 3.63157895, "global_step/max_steps": "15/60", "percentage": "25.00%", "elapsed_time": "10h 15m 57s", "remaining_time": "1d 6h 47m 53s"} {"loss": -0.06071458, "grad_norm": 0.01321639, "learning_rate": 9.177e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000431, "kl": 0.16748047, "clip_ratio": 0.00011349, "epoch": 3.84210526, "global_step/max_steps": "16/60", "percentage": "26.67%", "elapsed_time": "10h 18m 45s", "remaining_time": "1d 4h 21m 34s"} {"loss": -0.04504441, "grad_norm": 0.00835275, "learning_rate": 9.011e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.00041, "completion_length": 10822.3515625, "response_clip_ratio": 0.5625, "rewards/CosineReward": 0.02731912, "rewards/RepetitionPenalty": -2.3e-07, "reward": 0.02731888, "reward_std": 0.10441224, "kl": 0.17871094, "clip_ratio": 2.486e-05, "epoch": 4.21052632, "global_step/max_steps": "17/60", "percentage": "28.33%", "elapsed_time": "11h 31m 31s", "remaining_time": "1d 5h 9m 9s"} {"loss": -0.04548755, "grad_norm": 0.0059984, "learning_rate": 8.83e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000432, "epoch": 4.42105263, "global_step/max_steps": "18/60", "percentage": "30.00%", "elapsed_time": "11h 34m 19s", "remaining_time": "1d 3h 0m 6s"} {"eval_loss": -0.3821989, "eval_completion_length": 12289.0, "eval_response_clip_ratio": 1.0, "eval_rewards/CosineReward": 0.03729328, "eval_rewards/RepetitionPenalty": 0.0, "eval_reward": 0.03729328, "eval_reward_std": 0.10691347, "eval_kl": 0.18359375, "eval_clip_ratio": 2.287e-05, "eval_runtime": 1041.231, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "epoch": 4.42105263, "global_step/max_steps": "18/60", "percentage": "30.00%", "elapsed_time": "11h 51m 41s", "remaining_time": "1d 3h 40m 35s"} {"loss": -0.03466903, "grad_norm": 0.00707562, "learning_rate": 8.637e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000404, "kl": 0.18200684, "clip_ratio": 6.176e-05, "completion_length": 10454.50390625, "response_clip_ratio": 0.46679688, "rewards/CosineReward": 0.04070046, "rewards/RepetitionPenalty": -1.29e-06, "reward": 0.04069917, "reward_std": 0.11991006, "epoch": 4.63157895, "global_step/max_steps": "19/60", "percentage": "31.67%", "elapsed_time": "13h 2m 57s", "remaining_time": "1d 4h 9m 31s"} {"loss": -0.03502114, "grad_norm": 0.00589657, "learning_rate": 8.431e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000424, "kl": 0.19287109, "clip_ratio": 6.069e-05, "epoch": 4.84210526, "global_step/max_steps": "20/60", "percentage": "33.33%", "elapsed_time": "13h 5m 40s", "remaining_time": "1d 2h 11m 20s"} {"loss": -0.00866277, "grad_norm": 0.00415454, "learning_rate": 8.214e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000408, "completion_length": 10645.05664062, "response_clip_ratio": 0.5625, "rewards/CosineReward": 0.04996993, "rewards/RepetitionPenalty": -1.2e-07, "reward": 0.04996981, "reward_std": 0.1384942, "kl": 0.17626953, "clip_ratio": 3.873e-05, "epoch": 5.21052632, "global_step/max_steps": "21/60", "percentage": "35.00%", "elapsed_time": "14h 18m 46s", "remaining_time": "1d 2h 34m 52s"} {"loss": -0.00874364, "grad_norm": 0.00430067, "learning_rate": 7.986e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000426, "kl": 0.17895508, "clip_ratio": 5.869e-05, "epoch": 5.42105263, "global_step/max_steps": "22/60", "percentage": "36.67%", "elapsed_time": "14h 21m 16s", "remaining_time": "1d 0h 47m 38s"} {"loss": -0.03423421, "grad_norm": 0.01327698, "learning_rate": 7.748e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.00041, "completion_length": 10538.07226562, "response_clip_ratio": 0.58398438, "rewards/CosineReward": 0.03447545, "rewards/RepetitionPenalty": -6.61e-06, "reward": 0.03446883, "reward_std": 0.11841745, "kl": 0.1796875, "clip_ratio": 4.635e-05, "epoch": 5.63157895, "global_step/max_steps": "23/60", "percentage": "38.33%", "elapsed_time": "15h 34m 17s", "remaining_time": "1d 1h 2m 58s"} {"loss": -0.03426633, "grad_norm": 0.01413172, "learning_rate": 7.5e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000427, "epoch": 5.84210526, "global_step/max_steps": "24/60", "percentage": "40.00%", "elapsed_time": "15h 37m 2s", "remaining_time": "23h 25m 34s"} {"eval_loss": 0.36124694, "eval_completion_length": 12289.0, "eval_response_clip_ratio": 1.0, "eval_rewards/CosineReward": 0.04339283, "eval_rewards/RepetitionPenalty": 0.0, "eval_reward": 0.04339283, "eval_reward_std": 0.10456254, "eval_kl": 0.19824219, "eval_clip_ratio": 4.069e-05, "eval_runtime": 1045.0632, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "epoch": 5.84210526, "global_step/max_steps": "24/60", "percentage": "40.00%", "elapsed_time": "15h 54m 27s", "remaining_time": "23h 51m 41s"} {"loss": -0.02097315, "grad_norm": 0.00993353, "learning_rate": 7.244e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000406, "kl": 0.18005371, "clip_ratio": 5.057e-05, "completion_length": 10789.25976562, "response_clip_ratio": 0.6171875, "rewards/CosineReward": 0.03010232, "rewards/RepetitionPenalty": -2.6e-07, "reward": 0.03010206, "reward_std": 0.10742512, "epoch": 6.21052632, "global_step/max_steps": "25/60", "percentage": "41.67%", "elapsed_time": "17h 6m 5s", "remaining_time": "23h 56m 31s"} {"loss": -0.02103914, "grad_norm": 0.00989576, "learning_rate": 6.98e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000421, "kl": 0.18408203, "clip_ratio": 4.822e-05, "epoch": 6.42105263, "global_step/max_steps": "26/60", "percentage": "43.33%", "elapsed_time": "17h 8m 51s", "remaining_time": "22h 25m 25s"} {"loss": -0.03593946, "grad_norm": 0.00436775, "learning_rate": 6.71e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000408, "completion_length": 10197.09960938, "response_clip_ratio": 0.51367188, "rewards/CosineReward": 0.0475284, "rewards/RepetitionPenalty": -7.9e-07, "reward": 0.0475276, "reward_std": 0.14935148, "kl": 0.17456055, "clip_ratio": 5.443e-05, "epoch": 6.63157895, "global_step/max_steps": "27/60", "percentage": "45.00%", "elapsed_time": "18h 22m 20s", "remaining_time": "22h 27m 18s"} {"loss": -0.03595501, "grad_norm": 0.00527766, "learning_rate": 6.434e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000422, "kl": 0.18237305, "clip_ratio": 5.954e-05, "epoch": 6.84210526, "global_step/max_steps": "28/60", "percentage": "46.67%", "elapsed_time": "18h 25m 42s", "remaining_time": "21h 3m 40s"} {"loss": -0.03189056, "grad_norm": 0.0168444, "learning_rate": 6.153e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.00041, "completion_length": 10427.296875, "response_clip_ratio": 0.54101562, "rewards/CosineReward": 0.04964366, "rewards/RepetitionPenalty": -7.5e-07, "reward": 0.04964291, "reward_std": 0.13294973, "kl": 0.18701172, "clip_ratio": 5.28e-05, "epoch": 7.21052632, "global_step/max_steps": "29/60", "percentage": "48.33%", "elapsed_time": "19h 37m 59s", "remaining_time": "20h 59m 14s"} {"loss": -0.03209799, "grad_norm": 0.01443596, "learning_rate": 5.868e-05, "memory(GiB)": 182.91, "train_speed(iter/s)": 0.000423, "epoch": 7.42105263, "global_step/max_steps": "30/60", "percentage": "50.00%", "elapsed_time": "19h 40m 53s", "remaining_time": "19h 40m 53s"} {"eval_loss": -0.09817081, "eval_completion_length": 12289.0, "eval_response_clip_ratio": 1.0, "eval_rewards/CosineReward": 0.05227777, "eval_rewards/RepetitionPenalty": 0.0, "eval_reward": 0.05227778, "eval_reward_std": 0.14013015, "eval_kl": 0.19726562, "eval_clip_ratio": 4.542e-05, "eval_runtime": 1085.6092, "eval_samples_per_second": 0.001, "eval_steps_per_second": 0.001, "epoch": 7.42105263, "global_step/max_steps": "30/60", "percentage": "50.00%", "elapsed_time": "19h 58m 59s", "remaining_time": "19h 58m 59s"}