{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.238293647766113, "eval_logits/rejected": 10.049151420593262, "eval_logps/chosen": -114.94315338134766, "eval_logps/ref_chosen": -106.16099548339844, "eval_logps/ref_rejected": -119.78473663330078, "eval_logps/rejected": -129.38494873046875, "eval_loss": 0.6894497275352478, "eval_rewards/accuracies": 0.5833333134651184, "eval_rewards/chosen": -0.08782169967889786, "eval_rewards/margins": 0.008180392906069756, "eval_rewards/rejected": -0.09600208699703217, "eval_runtime": 152.328, "eval_samples": 1080, "eval_samples_per_second": 7.09, "eval_steps_per_second": 1.772, "total_flos": 0.0, "train_loss": 0.6920125889778137, "train_runtime": 8059.7946, "train_samples": 9715, "train_samples_per_second": 1.205, "train_steps_per_second": 0.009 }