{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.28704833984375, "eval_logits/rejected": 10.111818313598633, "eval_logps/chosen": -77.38970184326172, "eval_logps/ref_chosen": -106.16099548339844, "eval_logps/ref_rejected": -119.78473663330078, "eval_logps/rejected": -90.60062408447266, "eval_loss": 0.3555455803871155, "eval_rewards/accuracies": 0.4879629611968994, "eval_rewards/chosen": 0.2877129316329956, "eval_rewards/margins": -0.004128198605030775, "eval_rewards/rejected": 0.2918411195278168, "eval_runtime": 199.7246, "eval_samples": 1080, "eval_samples_per_second": 5.407, "eval_sft_loss": 0.2702542543411255, "eval_steps_per_second": 1.352, "total_flos": 0.0, "train_loss": 0.3833660825093587, "train_runtime": 8038.2327, "train_samples": 9715, "train_samples_per_second": 1.209, "train_steps_per_second": 0.009 }