{ "epoch": 0.9990987511265611, "eval_log_ratio_diff/mean": 0.1875, "eval_logits/chosen": -2.515625, "eval_logits/rejected": -2.546875, "eval_logps/chosen": -1.203125, "eval_logps/rejected": -1.6015625, "eval_loss": 6.4530487060546875, "eval_nll_loss": 1.203125, "eval_ref_probs/chosen": 0.39426562190055847, "eval_ref_probs/rejected": 0.3336612582206726, "eval_rejected_term/max": -0.052632447332143784, "eval_rejected_term/min": -0.052632447332143784, "eval_rewards/accuracies": 0.7239999771118164, "eval_rewards/chosen": -0.01519775390625, "eval_rewards/margins": 0.018798828125, "eval_rewards/rejected": -0.033935546875, "eval_runtime": 25.6341, "eval_samples_per_second": 39.01, "eval_steps_per_second": 9.753 }