{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.04302170127630234, "eval_logits/rejected": 0.1195269301533699, "eval_logps/chosen": -138.40455627441406, "eval_logps/ref_chosen": -138.20062255859375, "eval_logps/ref_rejected": -174.97462463378906, "eval_logps/rejected": -175.22572326660156, "eval_loss": 0.6929154992103577, "eval_rewards/accuracies": 0.5379629731178284, "eval_rewards/chosen": -0.0020393093582242727, "eval_rewards/margins": 0.00047187993186526, "eval_rewards/rejected": -0.002511189319193363, "eval_runtime": 144.5655, "eval_samples": 1080, "eval_samples_per_second": 7.471, "eval_steps_per_second": 1.868 }