{ "epoch": 9.977728285077951, "eval_logits/chosen": -0.13963328301906586, "eval_logits/rejected": -0.2079545557498932, "eval_logps/chosen": -13.248819351196289, "eval_logps/rejected": -29.226905822753906, "eval_loss": 0.4674791693687439, "eval_rewards/accuracies": 0.7899999618530273, "eval_rewards/chosen": 0.9902573823928833, "eval_rewards/margins": 1.390006184577942, "eval_rewards/rejected": -0.3997488021850586, "eval_runtime": 9.6246, "eval_samples_per_second": 10.39, "eval_steps_per_second": 5.195 }