{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15808974886784685, "eval_steps": 500, "global_step": 12, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013174145738987238, "grad_norm": 1.0125823020935059, "learning_rate": 6.25e-08, "logits/chosen": 9.990612030029297, "logits/rejected": 10.698101997375488, "logps/chosen": -102.88545989990234, "logps/ref_chosen": -102.88545989990234, "logps/ref_rejected": -121.84871673583984, "logps/rejected": -121.84871673583984, "loss": 0.4327, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "sft_loss": 0.36753880977630615, "step": 1 }, { "epoch": 0.026348291477974475, "grad_norm": 0.3579196035861969, "learning_rate": 1.25e-07, "logits/chosen": 10.211905479431152, "logits/rejected": 11.06594467163086, "logps/chosen": -107.70349884033203, "logps/ref_chosen": -107.70349884033203, "logps/ref_rejected": -121.89966583251953, "logps/rejected": -121.89966583251953, "loss": 0.4667, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "sft_loss": 0.41013145446777344, "step": 2 }, { "epoch": 0.03952243721696171, "grad_norm": 0.49040451645851135, "learning_rate": 1.875e-07, "logits/chosen": 10.035531044006348, "logits/rejected": 11.027185440063477, "logps/chosen": -108.23310852050781, "logps/ref_chosen": -107.98188781738281, "logps/ref_rejected": -124.51527404785156, "logps/rejected": -124.64785766601562, "loss": 0.4683, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0025122263468801975, "rewards/margins": -0.0011863748077303171, "rewards/rejected": -0.0013258515391498804, "sft_loss": 0.41194257140159607, "step": 3 }, { "epoch": 0.05269658295594895, "grad_norm": 0.8740162253379822, "learning_rate": 2.5e-07, "logits/chosen": 9.860024452209473, "logits/rejected": 10.876106262207031, "logps/chosen": -109.94369506835938, "logps/ref_chosen": -109.20836639404297, "logps/ref_rejected": -119.23908996582031, "logps/rejected": -119.73454284667969, "loss": 0.4633, "rewards/accuracies": 0.3828125, "rewards/chosen": -0.007353362161666155, "rewards/margins": -0.002398767275735736, "rewards/rejected": -0.004954595118761063, "sft_loss": 0.40552011132240295, "step": 4 }, { "epoch": 0.06587072869493618, "grad_norm": 1.1980141401290894, "learning_rate": 3.1249999999999997e-07, "logits/chosen": 10.19467830657959, "logits/rejected": 10.95050048828125, "logps/chosen": -104.02793884277344, "logps/ref_chosen": -103.87680053710938, "logps/ref_rejected": -118.41618347167969, "logps/rejected": -118.46170806884766, "loss": 0.4351, "rewards/accuracies": 0.5078125, "rewards/chosen": -0.001511452835984528, "rewards/margins": -0.001056289067491889, "rewards/rejected": -0.0004551640013232827, "sft_loss": 0.3704559803009033, "step": 5 }, { "epoch": 0.07904487443392343, "grad_norm": 0.928102433681488, "learning_rate": 3.75e-07, "logits/chosen": 10.701957702636719, "logits/rejected": 11.477033615112305, "logps/chosen": -107.61714935302734, "logps/ref_chosen": -107.58968353271484, "logps/ref_rejected": -122.07303619384766, "logps/rejected": -122.0443115234375, "loss": 0.4515, "rewards/accuracies": 0.4765625, "rewards/chosen": -0.0002746534300968051, "rewards/margins": -0.0005618570139631629, "rewards/rejected": 0.0002872035256586969, "sft_loss": 0.3909577429294586, "step": 6 }, { "epoch": 0.09221902017291066, "grad_norm": 1.0250380039215088, "learning_rate": 4.375e-07, "logits/chosen": 10.025421142578125, "logits/rejected": 10.72871208190918, "logps/chosen": -107.13175201416016, "logps/ref_chosen": -107.42727661132812, "logps/ref_rejected": -116.87063598632812, "logps/rejected": -116.28421020507812, "loss": 0.4392, "rewards/accuracies": 0.3984375, "rewards/chosen": 0.002955180360004306, "rewards/margins": -0.0029091311153024435, "rewards/rejected": 0.005864311475306749, "sft_loss": 0.3753029406070709, "step": 7 }, { "epoch": 0.1053931659118979, "grad_norm": 0.5661666393280029, "learning_rate": 5e-07, "logits/chosen": 10.203546524047852, "logits/rejected": 11.103278160095215, "logps/chosen": -104.93194580078125, "logps/ref_chosen": -105.60282135009766, "logps/ref_rejected": -119.53916931152344, "logps/rejected": -118.93331909179688, "loss": 0.4416, "rewards/accuracies": 0.515625, "rewards/chosen": 0.006708861328661442, "rewards/margins": 0.0006504050688818097, "rewards/rejected": 0.006058456376194954, "sft_loss": 0.3787955939769745, "step": 8 }, { "epoch": 0.11856731165088513, "grad_norm": 0.820360541343689, "learning_rate": 4.997252228714278e-07, "logits/chosen": 10.184520721435547, "logits/rejected": 11.154094696044922, "logps/chosen": -104.26238250732422, "logps/ref_chosen": -105.46086120605469, "logps/ref_rejected": -119.00373840332031, "logps/rejected": -117.88744354248047, "loss": 0.4437, "rewards/accuracies": 0.53125, "rewards/chosen": 0.011985024437308311, "rewards/margins": 0.0008220230811275542, "rewards/rejected": 0.011163001880049706, "sft_loss": 0.38146448135375977, "step": 9 }, { "epoch": 0.13174145738987236, "grad_norm": 0.4781506061553955, "learning_rate": 4.989014955054745e-07, "logits/chosen": 10.042634963989258, "logits/rejected": 10.866905212402344, "logps/chosen": -101.11405944824219, "logps/ref_chosen": -104.21009826660156, "logps/ref_rejected": -118.9209213256836, "logps/rejected": -115.99314880371094, "loss": 0.4088, "rewards/accuracies": 0.46875, "rewards/chosen": 0.030960241332650185, "rewards/margins": 0.0016824830090627074, "rewards/rejected": 0.029277760535478592, "sft_loss": 0.3378788232803345, "step": 10 }, { "epoch": 0.14491560312885962, "grad_norm": 0.8178320527076721, "learning_rate": 4.975306286336627e-07, "logits/chosen": 9.987105369567871, "logits/rejected": 11.181533813476562, "logps/chosen": -101.77717590332031, "logps/ref_chosen": -105.94319152832031, "logps/ref_rejected": -122.76007843017578, "logps/rejected": -119.00365447998047, "loss": 0.4478, "rewards/accuracies": 0.578125, "rewards/chosen": 0.04166024178266525, "rewards/margins": 0.0040960111655294895, "rewards/rejected": 0.0375642292201519, "sft_loss": 0.3869646191596985, "step": 11 }, { "epoch": 0.15808974886784685, "grad_norm": 0.7931886315345764, "learning_rate": 4.956156357188939e-07, "logits/chosen": 9.913724899291992, "logits/rejected": 10.605714797973633, "logps/chosen": -104.08253479003906, "logps/ref_chosen": -109.08442687988281, "logps/ref_rejected": -121.41947174072266, "logps/rejected": -116.61964416503906, "loss": 0.4233, "rewards/accuracies": 0.4765625, "rewards/chosen": 0.05001899227499962, "rewards/margins": 0.002020882908254862, "rewards/rejected": 0.04799811542034149, "sft_loss": 0.3560585379600525, "step": 12 } ], "logging_steps": 1, "max_steps": 75, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 12, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }