math_ultrachatmistral5 / eval_results.json
oabi's picture
End of training
67d3b4b verified
{
"epoch": 1.0,
"eval_logits/chosen": -3.0115926265716553,
"eval_logits/rejected": -3.022681474685669,
"eval_logps/chosen": -266.7742004394531,
"eval_logps/rejected": -253.80645751953125,
"eval_loss": 0.6836252808570862,
"eval_rewards/accuracies": 0.5225093364715576,
"eval_rewards/chosen": -0.020729802548885345,
"eval_rewards/margins": 0.020151691511273384,
"eval_rewards/rejected": -0.040862053632736206,
"eval_runtime": 165.8484,
"eval_samples": 2000,
"eval_samples_per_second": 12.21,
"eval_steps_per_second": 0.386
}