|
{ |
|
"epoch": 9.955555555555556, |
|
"eval_logits/chosen": -0.21535295248031616, |
|
"eval_logits/rejected": -0.31062132120132446, |
|
"eval_logps/chosen": -7.679609775543213, |
|
"eval_logps/rejected": -31.993576049804688, |
|
"eval_loss": 0.307707816362381, |
|
"eval_rewards/accuracies": 0.8499999642372131, |
|
"eval_rewards/chosen": 1.48140287399292, |
|
"eval_rewards/margins": 2.2414333820343018, |
|
"eval_rewards/rejected": -0.7600305080413818, |
|
"eval_runtime": 12.0737, |
|
"eval_samples_per_second": 8.282, |
|
"eval_steps_per_second": 4.141, |
|
"total_flos": 6.741083695664333e+16, |
|
"train_loss": 0.31093634622437616, |
|
"train_runtime": 2833.3873, |
|
"train_samples_per_second": 3.176, |
|
"train_steps_per_second": 0.198 |
|
} |