|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9893390191897654, |
|
"eval_steps": 500, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 395.3694381713867, |
|
"epoch": 0.017057569296375266, |
|
"grad_norm": 0.25610095262527466, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.1043, |
|
"reward": 1.9107143729925156, |
|
"reward_std": 0.3902299851179123, |
|
"rewards/accuracy_reward": 0.6462053805589676, |
|
"rewards/compress_reward": 0.3247768059372902, |
|
"rewards/format_reward": 0.9397321790456772, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 398.57200622558594, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 0.3123304545879364, |
|
"kl": 0.00016838312149047852, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1161, |
|
"reward": 1.8558873608708382, |
|
"reward_std": 0.4281642036512494, |
|
"rewards/accuracy_reward": 0.5892857434228063, |
|
"rewards/compress_reward": 0.3148716646246612, |
|
"rewards/format_reward": 0.9517299514263868, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 367.61095199584963, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 2.265721321105957, |
|
"kl": 0.015511131286621094, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.1141, |
|
"reward": 1.9002233117818832, |
|
"reward_std": 0.40832622945308683, |
|
"rewards/accuracy_reward": 0.5988839618861675, |
|
"rewards/compress_reward": 0.3475446566939354, |
|
"rewards/format_reward": 0.9537946879863739, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 313.22568435668944, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 0.40780770778656006, |
|
"kl": 0.0651092529296875, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.0857, |
|
"reward": 1.9601563423871995, |
|
"reward_std": 0.39528256431221964, |
|
"rewards/accuracy_reward": 0.5776785969734192, |
|
"rewards/compress_reward": 0.4090401969850063, |
|
"rewards/format_reward": 0.9734375372529029, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 286.3823780059814, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 4.390214443206787, |
|
"kl": 0.06104736328125, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.0709, |
|
"reward": 2.0002232909202577, |
|
"reward_std": 0.36102210320532324, |
|
"rewards/accuracy_reward": 0.5935268148779869, |
|
"rewards/compress_reward": 0.4294643089175224, |
|
"rewards/format_reward": 0.9772321805357933, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 293.0426471710205, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 0.9113842844963074, |
|
"kl": 0.069866943359375, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.0508, |
|
"reward": 2.032812610268593, |
|
"reward_std": 0.3311908446252346, |
|
"rewards/accuracy_reward": 0.5979910969734192, |
|
"rewards/compress_reward": 0.4497768051922321, |
|
"rewards/format_reward": 0.985044677555561, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 315.613631439209, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 1.5531269311904907, |
|
"kl": 0.064349365234375, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 0.0379, |
|
"reward": 2.0685268968343733, |
|
"reward_std": 0.2967603411525488, |
|
"rewards/accuracy_reward": 0.6350446686148643, |
|
"rewards/compress_reward": 0.442857164144516, |
|
"rewards/format_reward": 0.990625025331974, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 313.02188873291016, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.4903877377510071, |
|
"kl": 0.056243896484375, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 0.0389, |
|
"reward": 2.054687598347664, |
|
"reward_std": 0.31466612182557585, |
|
"rewards/accuracy_reward": 0.6426339596509933, |
|
"rewards/compress_reward": 0.428348234295845, |
|
"rewards/format_reward": 0.983705396950245, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 295.896439743042, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 0.32720160484313965, |
|
"kl": 0.061297607421875, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 0.0381, |
|
"reward": 2.0698661684989927, |
|
"reward_std": 0.29936849512159824, |
|
"rewards/accuracy_reward": 0.6511161006987095, |
|
"rewards/compress_reward": 0.43348216339945794, |
|
"rewards/format_reward": 0.9852678909897804, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 294.11586112976073, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 0.4028303623199463, |
|
"kl": 0.06295166015625, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.0326, |
|
"reward": 2.073660808801651, |
|
"reward_std": 0.3071202915161848, |
|
"rewards/accuracy_reward": 0.6421875312924386, |
|
"rewards/compress_reward": 0.4457589529454708, |
|
"rewards/format_reward": 0.9857143208384513, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 289.0868427276611, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 0.41271042823791504, |
|
"kl": 0.06861572265625, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.0452, |
|
"reward": 2.044196516275406, |
|
"reward_std": 0.32505202740430833, |
|
"rewards/accuracy_reward": 0.6229910977184773, |
|
"rewards/compress_reward": 0.44084823578596116, |
|
"rewards/format_reward": 0.9803571760654449, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 286.33059158325193, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 0.5375810861587524, |
|
"kl": 0.07474365234375, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.0347, |
|
"reward": 2.0625000923871992, |
|
"reward_std": 0.34456842839717866, |
|
"rewards/accuracy_reward": 0.6479911044239998, |
|
"rewards/compress_reward": 0.4363839469850063, |
|
"rewards/format_reward": 0.9781250387430191, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 274.45362917582196, |
|
"epoch": 0.9893390191897654, |
|
"kl": 0.074554443359375, |
|
"reward": 2.0604539712270102, |
|
"reward_std": 0.31418706725041073, |
|
"rewards/accuracy_reward": 0.6287202710906664, |
|
"rewards/compress_reward": 0.4499628196159999, |
|
"rewards/format_reward": 0.98177087555329, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05939787125279163, |
|
"train_runtime": 12451.444, |
|
"train_samples_per_second": 0.602, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|