chchen's picture
End of training
a862bf0 verified
{
"epoch": 9.955555555555556,
"eval_logits/chosen": -0.21535295248031616,
"eval_logits/rejected": -0.31062132120132446,
"eval_logps/chosen": -7.679609775543213,
"eval_logps/rejected": -31.993576049804688,
"eval_loss": 0.307707816362381,
"eval_rewards/accuracies": 0.8499999642372131,
"eval_rewards/chosen": 1.48140287399292,
"eval_rewards/margins": 2.2414333820343018,
"eval_rewards/rejected": -0.7600305080413818,
"eval_runtime": 12.0737,
"eval_samples_per_second": 8.282,
"eval_steps_per_second": 4.141,
"total_flos": 6.741083695664333e+16,
"train_loss": 0.31093634622437616,
"train_runtime": 2833.3873,
"train_samples_per_second": 3.176,
"train_steps_per_second": 0.198
}