{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.238580703735352, "eval_logits/rejected": 10.054126739501953, "eval_logps/chosen": -0.3649774193763733, "eval_logps/rejected": -0.3733733296394348, "eval_loss": 1.383278250694275, "eval_rewards/accuracies": 0.5259259343147278, "eval_rewards/chosen": -0.7299548387527466, "eval_rewards/margins": 0.016791854053735733, "eval_rewards/rejected": -0.7467466592788696, "eval_runtime": 163.2381, "eval_samples": 1080, "eval_samples_per_second": 6.616, "eval_steps_per_second": 1.654 }