{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.0485045425593853, "eval_logits/rejected": 0.11842113733291626, "eval_logps/chosen": -140.11708068847656, "eval_logps/ref_chosen": -139.74691772460938, "eval_logps/ref_rejected": -176.36155700683594, "eval_logps/rejected": -176.86927795410156, "eval_loss": 0.6924660801887512, "eval_rewards/accuracies": 0.5833333134651184, "eval_rewards/chosen": -0.0037015508860349655, "eval_rewards/margins": 0.0013754719402641058, "eval_rewards/rejected": -0.005077023059129715, "eval_runtime": 143.2062, "eval_samples": 1080, "eval_samples_per_second": 7.542, "eval_steps_per_second": 1.885, "total_flos": 0.0, "train_loss": 0.692774099111557, "train_runtime": 614.9448, "train_samples": 1387, "train_samples_per_second": 2.255, "train_steps_per_second": 0.016 }