{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.247183799743652, "eval_logits/rejected": 10.069817543029785, "eval_logps/chosen": -0.4010205566883087, "eval_logps/rejected": -0.41174402832984924, "eval_loss": 1.3837605714797974, "eval_rewards/accuracies": 0.5231481194496155, "eval_rewards/chosen": -0.8020411133766174, "eval_rewards/margins": 0.02144695073366165, "eval_rewards/rejected": -0.8234880566596985, "eval_runtime": 145.5386, "eval_samples": 1080, "eval_samples_per_second": 7.421, "eval_steps_per_second": 1.855, "total_flos": 0.0, "train_loss": 1.401187040011088, "train_runtime": 7987.3887, "train_samples": 9715, "train_samples_per_second": 1.216, "train_steps_per_second": 0.009 }