{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9893390191897654, "eval_steps": 500, "global_step": 58, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completion_length": 395.3694381713867, "epoch": 0.017057569296375266, "grad_norm": 0.25610095262527466, "kl": 0.0, "learning_rate": 5e-07, "loss": 0.1043, "reward": 1.9107143729925156, "reward_std": 0.3902299851179123, "rewards/accuracy_reward": 0.6462053805589676, "rewards/compress_reward": 0.3247768059372902, "rewards/format_reward": 0.9397321790456772, "step": 1 }, { "clip_ratio": 0.0, "completion_length": 398.57200622558594, "epoch": 0.08528784648187633, "grad_norm": 0.3123304545879364, "kl": 0.00016838312149047852, "learning_rate": 2.5e-06, "loss": 0.1161, "reward": 1.8558873608708382, "reward_std": 0.4281642036512494, "rewards/accuracy_reward": 0.5892857434228063, "rewards/compress_reward": 0.3148716646246612, "rewards/format_reward": 0.9517299514263868, "step": 5 }, { "clip_ratio": 0.0, "completion_length": 367.61095199584963, "epoch": 0.17057569296375266, "grad_norm": 2.265721321105957, "kl": 0.015511131286621094, "learning_rate": 2.956412726139078e-06, "loss": 0.1141, "reward": 1.9002233117818832, "reward_std": 0.40832622945308683, "rewards/accuracy_reward": 0.5988839618861675, "rewards/compress_reward": 0.3475446566939354, "rewards/format_reward": 0.9537946879863739, "step": 10 }, { "clip_ratio": 0.0, "completion_length": 313.22568435668944, "epoch": 0.255863539445629, "grad_norm": 0.40780770778656006, "kl": 0.0651092529296875, "learning_rate": 2.7836719084521715e-06, "loss": 0.0857, "reward": 1.9601563423871995, "reward_std": 0.39528256431221964, "rewards/accuracy_reward": 0.5776785969734192, "rewards/compress_reward": 0.4090401969850063, "rewards/format_reward": 0.9734375372529029, "step": 15 }, { "clip_ratio": 0.0, "completion_length": 286.3823780059814, "epoch": 0.3411513859275053, "grad_norm": 4.390214443206787, "kl": 0.06104736328125, "learning_rate": 2.4946839873611927e-06, "loss": 0.0709, "reward": 2.0002232909202577, "reward_std": 0.36102210320532324, "rewards/accuracy_reward": 0.5935268148779869, "rewards/compress_reward": 0.4294643089175224, "rewards/format_reward": 0.9772321805357933, "step": 20 }, { "clip_ratio": 0.0, "completion_length": 293.0426471710205, "epoch": 0.42643923240938164, "grad_norm": 0.9113842844963074, "kl": 0.069866943359375, "learning_rate": 2.1156192081791355e-06, "loss": 0.0508, "reward": 2.032812610268593, "reward_std": 0.3311908446252346, "rewards/accuracy_reward": 0.5979910969734192, "rewards/compress_reward": 0.4497768051922321, "rewards/format_reward": 0.985044677555561, "step": 25 }, { "clip_ratio": 0.0, "completion_length": 315.613631439209, "epoch": 0.511727078891258, "grad_norm": 1.5531269311904907, "kl": 0.064349365234375, "learning_rate": 1.6808050203829845e-06, "loss": 0.0379, "reward": 2.0685268968343733, "reward_std": 0.2967603411525488, "rewards/accuracy_reward": 0.6350446686148643, "rewards/compress_reward": 0.442857164144516, "rewards/format_reward": 0.990625025331974, "step": 30 }, { "clip_ratio": 0.0, "completion_length": 313.02188873291016, "epoch": 0.5970149253731343, "grad_norm": 0.4903877377510071, "kl": 0.056243896484375, "learning_rate": 1.2296174432791415e-06, "loss": 0.0389, "reward": 2.054687598347664, "reward_std": 0.31466612182557585, "rewards/accuracy_reward": 0.6426339596509933, "rewards/compress_reward": 0.428348234295845, "rewards/format_reward": 0.983705396950245, "step": 35 }, { "clip_ratio": 0.0, "completion_length": 295.896439743042, "epoch": 0.6823027718550106, "grad_norm": 0.32720160484313965, "kl": 0.061297607421875, "learning_rate": 8.029152419343472e-07, "loss": 0.0381, "reward": 2.0698661684989927, "reward_std": 0.29936849512159824, "rewards/accuracy_reward": 0.6511161006987095, "rewards/compress_reward": 0.43348216339945794, "rewards/format_reward": 0.9852678909897804, "step": 40 }, { "clip_ratio": 0.0, "completion_length": 294.11586112976073, "epoch": 0.767590618336887, "grad_norm": 0.4028303623199463, "kl": 0.06295166015625, "learning_rate": 4.3933982822017883e-07, "loss": 0.0326, "reward": 2.073660808801651, "reward_std": 0.3071202915161848, "rewards/accuracy_reward": 0.6421875312924386, "rewards/compress_reward": 0.4457589529454708, "rewards/format_reward": 0.9857143208384513, "step": 45 }, { "clip_ratio": 0.0, "completion_length": 289.0868427276611, "epoch": 0.8528784648187633, "grad_norm": 0.41271042823791504, "kl": 0.06861572265625, "learning_rate": 1.718159615201853e-07, "loss": 0.0452, "reward": 2.044196516275406, "reward_std": 0.32505202740430833, "rewards/accuracy_reward": 0.6229910977184773, "rewards/compress_reward": 0.44084823578596116, "rewards/format_reward": 0.9803571760654449, "step": 50 }, { "clip_ratio": 0.0, "completion_length": 286.33059158325193, "epoch": 0.9381663113006397, "grad_norm": 0.5375810861587524, "kl": 0.07474365234375, "learning_rate": 2.4570139579284723e-08, "loss": 0.0347, "reward": 2.0625000923871992, "reward_std": 0.34456842839717866, "rewards/accuracy_reward": 0.6479911044239998, "rewards/compress_reward": 0.4363839469850063, "rewards/format_reward": 0.9781250387430191, "step": 55 }, { "clip_ratio": 0.0, "completion_length": 274.45362917582196, "epoch": 0.9893390191897654, "kl": 0.074554443359375, "reward": 2.0604539712270102, "reward_std": 0.31418706725041073, "rewards/accuracy_reward": 0.6287202710906664, "rewards/compress_reward": 0.4499628196159999, "rewards/format_reward": 0.98177087555329, "step": 58, "total_flos": 0.0, "train_loss": 0.05939787125279163, "train_runtime": 12451.444, "train_samples_per_second": 0.602, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 58, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }