{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.03889964893460274, "eval_logits/rejected": 0.12152793258428574, "eval_logps/chosen": -125.9035415649414, "eval_logps/ref_chosen": -125.96058654785156, "eval_logps/ref_rejected": -162.4300994873047, "eval_logps/rejected": -162.40286254882812, "eval_loss": 0.5148054361343384, "eval_rewards/accuracies": 0.5277777910232544, "eval_rewards/chosen": 0.0005705787916667759, "eval_rewards/margins": 0.00029826530953869224, "eval_rewards/rejected": 0.0002723135403357446, "eval_runtime": 145.6449, "eval_samples": 1080, "eval_samples_per_second": 7.415, "eval_sft_loss": 0.42570608854293823, "eval_steps_per_second": 1.854, "total_flos": 0.0, "train_loss": 0.5319805860519409, "train_runtime": 668.7287, "train_samples": 1387, "train_samples_per_second": 2.074, "train_steps_per_second": 0.015 }