{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.019660519436001778, "eval_logits/rejected": 0.1411634236574173, "eval_logps/chosen": -139.75112915039062, "eval_logps/ref_chosen": -140.15744018554688, "eval_logps/ref_rejected": -176.90542602539062, "eval_logps/rejected": -176.54791259765625, "eval_loss": 0.6929076910018921, "eval_rewards/accuracies": 0.5249999761581421, "eval_rewards/chosen": 0.004063152242451906, "eval_rewards/margins": 0.0004878933250438422, "eval_rewards/rejected": 0.003575259121134877, "eval_runtime": 143.2325, "eval_samples": 1080, "eval_samples_per_second": 7.54, "eval_steps_per_second": 1.885 }