{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.35390567779541, "eval_logits/rejected": 10.18101692199707, "eval_logps/chosen": -0.37907084822654724, "eval_logps/rejected": -0.3889653980731964, "eval_loss": 1.382422924041748, "eval_rewards/accuracies": 0.5287036895751953, "eval_rewards/chosen": -0.7581416964530945, "eval_rewards/margins": 0.019789105281233788, "eval_rewards/rejected": -0.7779307961463928, "eval_runtime": 152.7821, "eval_samples": 1080, "eval_samples_per_second": 7.069, "eval_steps_per_second": 1.767 }