{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.010529891587793827, "eval_logits/rejected": 0.1464616358280182, "eval_logps/chosen": -128.34133911132812, "eval_logps/ref_chosen": -139.74691772460938, "eval_logps/ref_rejected": -176.36155700683594, "eval_logps/rejected": -164.8606719970703, "eval_loss": 0.4340026378631592, "eval_rewards/accuracies": 0.43888887763023376, "eval_rewards/chosen": 0.1140558049082756, "eval_rewards/margins": -0.0009530979441478848, "eval_rewards/rejected": 0.11500890552997589, "eval_runtime": 144.7424, "eval_samples": 1080, "eval_samples_per_second": 7.462, "eval_sft_loss": 0.43400266766548157, "eval_steps_per_second": 1.865 }