{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9221902017291066, "eval_steps": 500, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09221902017291066, "grad_norm": 0.4041491448879242, "learning_rate": 5e-07, "logits/chosen": 0.10785800218582153, "logits/rejected": 0.27330636978149414, "logps/chosen": -133.32418823242188, "logps/ref_chosen": -133.65472412109375, "logps/ref_rejected": -180.49884033203125, "logps/rejected": -180.05508422851562, "loss": 0.6937, "rewards/accuracies": 0.421875, "rewards/chosen": 0.0033053820952773094, "rewards/margins": -0.0011322337668389082, "rewards/rejected": 0.004437615629285574, "step": 1 }, { "epoch": 0.1844380403458213, "grad_norm": 0.3549412190914154, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.10197083652019501, "logits/rejected": 0.3037399351596832, "logps/chosen": -131.30230712890625, "logps/ref_chosen": -131.65380859375, "logps/ref_rejected": -172.54620361328125, "logps/rejected": -171.98153686523438, "loss": 0.6942, "rewards/accuracies": 0.3203125, "rewards/chosen": 0.003514800686389208, "rewards/margins": -0.002131802961230278, "rewards/rejected": 0.005646603647619486, "step": 2 }, { "epoch": 0.276657060518732, "grad_norm": 0.3252619802951813, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.12241504341363907, "logits/rejected": 0.294990211725235, "logps/chosen": -136.54458618164062, "logps/ref_chosen": -137.15257263183594, "logps/ref_rejected": -173.96763610839844, "logps/rejected": -173.33465576171875, "loss": 0.6933, "rewards/accuracies": 0.484375, "rewards/chosen": 0.006079699378460646, "rewards/margins": -0.00025035254657268524, "rewards/rejected": 0.0063300528563559055, "step": 3 }, { "epoch": 0.3688760806916426, "grad_norm": 0.26956096291542053, "learning_rate": 3.75e-07, "logits/chosen": 0.16513879597187042, "logits/rejected": 0.3464285731315613, "logps/chosen": -140.32760620117188, "logps/ref_chosen": -140.77301025390625, "logps/ref_rejected": -174.7427520751953, "logps/rejected": -174.2738037109375, "loss": 0.6933, "rewards/accuracies": 0.5078125, "rewards/chosen": 0.004454074427485466, "rewards/margins": -0.00023549507022835314, "rewards/rejected": 0.0046895695850253105, "step": 4 }, { "epoch": 0.4610951008645533, "grad_norm": 0.28848719596862793, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.05396685004234314, "logits/rejected": 0.21335136890411377, "logps/chosen": -133.9427490234375, "logps/ref_chosen": -134.1736297607422, "logps/ref_rejected": -165.17189025878906, "logps/rejected": -164.88853454589844, "loss": 0.6934, "rewards/accuracies": 0.4765625, "rewards/chosen": 0.0023086071014404297, "rewards/margins": -0.000525187817402184, "rewards/rejected": 0.002833794802427292, "step": 5 }, { "epoch": 0.553314121037464, "grad_norm": 0.32075604796409607, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.11954495310783386, "logits/rejected": 0.2969638407230377, "logps/chosen": -134.4439697265625, "logps/ref_chosen": -134.64218139648438, "logps/ref_rejected": -169.63507080078125, "logps/rejected": -169.50543212890625, "loss": 0.6928, "rewards/accuracies": 0.546875, "rewards/chosen": 0.0019819545559585094, "rewards/margins": 0.0006855088286101818, "rewards/rejected": 0.0012964459601789713, "step": 6 }, { "epoch": 0.6455331412103746, "grad_norm": 0.32672885060310364, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.11371711641550064, "logits/rejected": 0.3090907335281372, "logps/chosen": -132.55160522460938, "logps/ref_chosen": -132.65093994140625, "logps/ref_rejected": -172.39491271972656, "logps/rejected": -172.12570190429688, "loss": 0.694, "rewards/accuracies": 0.3828125, "rewards/chosen": 0.0009934443514794111, "rewards/margins": -0.001698581501841545, "rewards/rejected": 0.0026920258533209562, "step": 7 }, { "epoch": 0.7377521613832853, "grad_norm": 0.2813870310783386, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.12331125140190125, "logits/rejected": 0.2776939272880554, "logps/chosen": -130.44442749023438, "logps/ref_chosen": -130.6746063232422, "logps/ref_rejected": -162.9871368408203, "logps/rejected": -162.7511444091797, "loss": 0.6932, "rewards/accuracies": 0.5234375, "rewards/chosen": 0.0023018340580165386, "rewards/margins": -5.816877819597721e-05, "rewards/rejected": 0.0023600030690431595, "step": 8 }, { "epoch": 0.829971181556196, "grad_norm": 0.45836392045021057, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.10148279368877411, "logits/rejected": 0.2862037122249603, "logps/chosen": -135.59454345703125, "logps/ref_chosen": -135.72047424316406, "logps/ref_rejected": -172.78402709960938, "logps/rejected": -172.65322875976562, "loss": 0.6932, "rewards/accuracies": 0.484375, "rewards/chosen": 0.0012592630228027701, "rewards/margins": -4.884544250671752e-05, "rewards/rejected": 0.0013081086799502373, "step": 9 }, { "epoch": 0.9221902017291066, "grad_norm": 0.41180089116096497, "learning_rate": 0.0, "logits/chosen": 0.1567625105381012, "logits/rejected": 0.29784321784973145, "logps/chosen": -138.06668090820312, "logps/ref_chosen": -138.26239013671875, "logps/ref_rejected": -173.29273986816406, "logps/rejected": -173.15267944335938, "loss": 0.6929, "rewards/accuracies": 0.5546875, "rewards/chosen": 0.0019570719450712204, "rewards/margins": 0.0005564212915487587, "rewards/rejected": 0.0014006507117301226, "step": 10 }, { "epoch": 0.9221902017291066, "step": 10, "total_flos": 0.0, "train_loss": 0.6933929324150085, "train_runtime": 650.6499, "train_samples_per_second": 2.132, "train_steps_per_second": 0.015 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }