{ "epoch": 0.9182209469153515, "eval_logits/chosen": -0.01766319014132023, "eval_logits/rejected": 0.14318932592868805, "eval_logps/chosen": -138.21542358398438, "eval_logps/ref_chosen": -134.78414916992188, "eval_logps/ref_rejected": -171.45664978027344, "eval_logps/rejected": -174.8708953857422, "eval_loss": 0.693236768245697, "eval_rewards/accuracies": 0.47777777910232544, "eval_rewards/chosen": -0.034312743693590164, "eval_rewards/margins": -0.0001707002375042066, "eval_rewards/rejected": -0.03414204344153404, "eval_runtime": 143.4155, "eval_samples": 1080, "eval_samples_per_second": 7.531, "eval_steps_per_second": 1.883, "total_flos": 0.0, "train_loss": 0.6934699952602387, "train_runtime": 619.8127, "train_samples": 1393, "train_samples_per_second": 2.247, "train_steps_per_second": 0.016 }