{ "epoch": 0.9221902017291066, "eval_logits/chosen": -0.05567166954278946, "eval_logits/rejected": 0.10712358355522156, "eval_logps/chosen": -128.39443969726562, "eval_logps/ref_chosen": -127.12059020996094, "eval_logps/ref_rejected": -163.7115478515625, "eval_logps/rejected": -165.0626220703125, "eval_loss": 0.6066033840179443, "eval_rewards/accuracies": 0.5694444179534912, "eval_rewards/chosen": -0.012738817371428013, "eval_rewards/margins": 0.0007717205444350839, "eval_rewards/rejected": -0.013510540127754211, "eval_runtime": 145.1462, "eval_samples": 1080, "eval_samples_per_second": 7.441, "eval_sft_loss": 0.4342755675315857, "eval_steps_per_second": 1.86, "total_flos": 0.0, "train_loss": 0.6159235239028931, "train_runtime": 636.1291, "train_samples": 1387, "train_samples_per_second": 2.18, "train_steps_per_second": 0.016 }