{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.0485045425593853, "eval_logits/rejected": 0.11842113733291626, "eval_logps/chosen": -140.11708068847656, "eval_logps/ref_chosen": -139.74691772460938, "eval_logps/ref_rejected": -176.36155700683594, "eval_logps/rejected": -176.86927795410156, "eval_loss": 0.6924660801887512, "eval_rewards/accuracies": 0.5833333134651184, "eval_rewards/chosen": -0.0037015508860349655, "eval_rewards/margins": 0.0013754719402641058, "eval_rewards/rejected": -0.005077023059129715, "eval_runtime": 143.2062, "eval_samples": 1080, "eval_samples_per_second": 7.542, "eval_steps_per_second": 1.885 }