{ "epoch": 0.9990987511265611, "eval_log_ratio_diff/mean": 0.64453125, "eval_logits/chosen": -2.734375, "eval_logits/rejected": -2.671875, "eval_logps/chosen": -2.0625, "eval_logps/rejected": -2.921875, "eval_loss": 21.276519775390625, "eval_nll_loss": 2.0625, "eval_ref_probs/chosen": 0.39426562190055847, "eval_ref_probs/rejected": 0.3336612582206726, "eval_rejected_term/max": -0.09452494978904724, "eval_rejected_term/min": -0.09452494978904724, "eval_rewards/accuracies": 0.7360000014305115, "eval_rewards/chosen": -0.1015625, "eval_rewards/margins": 0.064453125, "eval_rewards/rejected": -0.166015625, "eval_runtime": 25.3403, "eval_samples_per_second": 39.463, "eval_steps_per_second": 9.866 }