{ "epoch": 1.0, "eval_logits/chosen": -3.4721531867980957, "eval_logits/rejected": -3.3256866931915283, "eval_logps/chosen": -254.42779541015625, "eval_logps/rejected": -714.5497436523438, "eval_loss": 0.00404787715524435, "eval_rewards/accuracies": 0.9983165264129639, "eval_rewards/chosen": 0.13777638971805573, "eval_rewards/margins": 29.16945457458496, "eval_rewards/rejected": -29.03167724609375, "eval_runtime": 460.1921, "eval_samples": 9500, "eval_samples_per_second": 20.644, "eval_steps_per_second": 0.645, "train_loss": 0.0552501158515984, "train_runtime": 31216.715, "train_samples": 188284, "train_samples_per_second": 6.032, "train_steps_per_second": 0.094 }