{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.137195587158203, "eval_logits/rejected": 9.94820499420166, "eval_logps/chosen": -0.38139861822128296, "eval_logps/rejected": -0.3884890377521515, "eval_loss": 1.3865872621536255, "eval_rewards/accuracies": 0.519444465637207, "eval_rewards/chosen": -0.7627972364425659, "eval_rewards/margins": 0.014180805534124374, "eval_rewards/rejected": -0.776978075504303, "eval_runtime": 165.9053, "eval_samples": 1080, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.627 }