{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.246881484985352, "eval_logits/rejected": 10.068053245544434, "eval_logps/chosen": -74.92684936523438, "eval_logps/ref_chosen": -106.16099548339844, "eval_logps/ref_rejected": -119.78473663330078, "eval_logps/rejected": -87.84820556640625, "eval_loss": 0.26139113306999207, "eval_rewards/accuracies": 0.47962963581085205, "eval_rewards/chosen": 0.3123414218425751, "eval_rewards/margins": -0.007023785263299942, "eval_rewards/rejected": 0.3193652033805847, "eval_runtime": 165.9841, "eval_samples": 1080, "eval_samples_per_second": 6.507, "eval_sft_loss": 0.26139113306999207, "eval_steps_per_second": 1.627 }