{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.010169930756092072, "eval_logits/rejected": 0.1450614333152771, "eval_logps/chosen": -138.02931213378906, "eval_logps/ref_chosen": -137.93557739257812, "eval_logps/ref_rejected": -174.6787567138672, "eval_logps/rejected": -174.78001403808594, "eval_loss": 0.6931137442588806, "eval_rewards/accuracies": 0.5064814686775208, "eval_rewards/chosen": -0.0009374108631163836, "eval_rewards/margins": 7.527563138864934e-05, "eval_rewards/rejected": -0.0010126865236088634, "eval_runtime": 146.6534, "eval_samples": 1080, "eval_samples_per_second": 7.364, "eval_steps_per_second": 1.841 }