{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.025785785168409348, "eval_logits/rejected": 0.1337740421295166, "eval_logps/chosen": -129.20240783691406, "eval_logps/ref_chosen": -127.95065307617188, "eval_logps/ref_rejected": -164.48851013183594, "eval_logps/rejected": -165.79054260253906, "eval_loss": 0.5649449825286865, "eval_rewards/accuracies": 0.520370364189148, "eval_rewards/chosen": -0.012517303228378296, "eval_rewards/margins": 0.0005030279280617833, "eval_rewards/rejected": -0.013020330108702183, "eval_runtime": 144.4308, "eval_samples": 1080, "eval_samples_per_second": 7.478, "eval_sft_loss": 0.4369891583919525, "eval_steps_per_second": 1.869, "total_flos": 0.0, "train_loss": 0.5759013295173645, "train_runtime": 619.8336, "train_samples": 1387, "train_samples_per_second": 2.238, "train_steps_per_second": 0.016 }