{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.158658981323242, "eval_logits/rejected": 9.958081245422363, "eval_logps/chosen": -110.41038513183594, "eval_logps/ref_chosen": -106.16099548339844, "eval_logps/ref_rejected": -119.78473663330078, "eval_logps/rejected": -124.09846496582031, "eval_loss": 0.6929683089256287, "eval_rewards/accuracies": 0.4972222149372101, "eval_rewards/chosen": -0.04249410331249237, "eval_rewards/margins": 0.0006433044327422976, "eval_rewards/rejected": -0.043137405067682266, "eval_runtime": 159.1867, "eval_samples": 1080, "eval_samples_per_second": 6.784, "eval_steps_per_second": 1.696 }