{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9221902017291066, "eval_steps": 500, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09221902017291066, "grad_norm": 0.49082234501838684, "learning_rate": 5e-07, "logits/chosen": 0.10258625447750092, "logits/rejected": 0.2278534471988678, "logps/chosen": -134.6281280517578, "logps/ref_chosen": -124.05789947509766, "logps/ref_rejected": -154.88946533203125, "logps/rejected": -165.65171813964844, "loss": 0.5071, "rewards/accuracies": 0.609375, "rewards/chosen": -0.1057022362947464, "rewards/margins": 0.0019202901748940349, "rewards/rejected": -0.10762252658605576, "sft_loss": 0.47003409266471863, "step": 1 }, { "epoch": 0.1844380403458213, "grad_norm": 0.457568883895874, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.06952403485774994, "logits/rejected": 0.3078790605068207, "logps/chosen": -133.73080444335938, "logps/ref_chosen": -123.33631896972656, "logps/ref_rejected": -168.01480102539062, "logps/rejected": -178.5548553466797, "loss": 0.5045, "rewards/accuracies": 0.6015625, "rewards/chosen": -0.10394492000341415, "rewards/margins": 0.001455550198443234, "rewards/rejected": -0.10540048032999039, "sft_loss": 0.4669448435306549, "step": 2 }, { "epoch": 0.276657060518732, "grad_norm": 0.3315995931625366, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.09175828099250793, "logits/rejected": 0.22343572974205017, "logps/chosen": -133.59866333007812, "logps/ref_chosen": -124.51382446289062, "logps/ref_rejected": -155.95262145996094, "logps/rejected": -165.19468688964844, "loss": 0.5209, "rewards/accuracies": 0.6328125, "rewards/chosen": -0.0908483937382698, "rewards/margins": 0.0015722049865871668, "rewards/rejected": -0.09242061525583267, "sft_loss": 0.48662421107292175, "step": 3 }, { "epoch": 0.3688760806916426, "grad_norm": 0.45795556902885437, "learning_rate": 3.75e-07, "logits/chosen": 0.0821409523487091, "logits/rejected": 0.24085786938667297, "logps/chosen": -132.02798461914062, "logps/ref_chosen": -123.37024688720703, "logps/ref_rejected": -160.60987854003906, "logps/rejected": -169.43934631347656, "loss": 0.4951, "rewards/accuracies": 0.59375, "rewards/chosen": -0.08657727390527725, "rewards/margins": 0.0017174197128042579, "rewards/rejected": -0.08829469233751297, "sft_loss": 0.45568379759788513, "step": 4 }, { "epoch": 0.4610951008645533, "grad_norm": 0.5167257785797119, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.1414952129125595, "logits/rejected": 0.23190416395664215, "logps/chosen": -129.588134765625, "logps/ref_chosen": -122.73413848876953, "logps/ref_rejected": -143.36245727539062, "logps/rejected": -150.25531005859375, "loss": 0.4931, "rewards/accuracies": 0.484375, "rewards/chosen": -0.06853996217250824, "rewards/margins": 0.00038862242945469916, "rewards/rejected": -0.06892858445644379, "sft_loss": 0.4531819820404053, "step": 5 }, { "epoch": 0.553314121037464, "grad_norm": 0.7184420824050903, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.14788731932640076, "logits/rejected": 0.29836705327033997, "logps/chosen": -130.35125732421875, "logps/ref_chosen": -124.05830383300781, "logps/ref_rejected": -150.84971618652344, "logps/rejected": -157.2368621826172, "loss": 0.4945, "rewards/accuracies": 0.5859375, "rewards/chosen": -0.0629296600818634, "rewards/margins": 0.0009420262649655342, "rewards/rejected": -0.06387168914079666, "sft_loss": 0.4549233317375183, "step": 6 }, { "epoch": 0.6455331412103746, "grad_norm": 0.5709189772605896, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.16531895101070404, "logits/rejected": 0.24730782210826874, "logps/chosen": -128.30152893066406, "logps/ref_chosen": -126.53475189208984, "logps/ref_rejected": -153.44937133789062, "logps/rejected": -155.2527313232422, "loss": 0.4814, "rewards/accuracies": 0.5546875, "rewards/chosen": -0.017667775973677635, "rewards/margins": 0.0003658741479739547, "rewards/rejected": -0.018033649772405624, "sft_loss": 0.4390857517719269, "step": 7 }, { "epoch": 0.7377521613832853, "grad_norm": 0.3785640299320221, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.05871865525841713, "logits/rejected": 0.2406335175037384, "logps/chosen": -120.95695495605469, "logps/ref_chosen": -120.56550598144531, "logps/ref_rejected": -159.84185791015625, "logps/rejected": -160.2302703857422, "loss": 0.4663, "rewards/accuracies": 0.484375, "rewards/chosen": -0.003914527129381895, "rewards/margins": -3.0469876946881413e-05, "rewards/rejected": -0.0038840575143694878, "sft_loss": 0.42091870307922363, "step": 8 }, { "epoch": 0.829971181556196, "grad_norm": 0.39315882325172424, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.14951348304748535, "logits/rejected": 0.21872764825820923, "logps/chosen": -124.28387451171875, "logps/ref_chosen": -124.57730102539062, "logps/ref_rejected": -146.5771484375, "logps/rejected": -146.2667236328125, "loss": 0.4617, "rewards/accuracies": 0.4453125, "rewards/chosen": 0.002934188349172473, "rewards/margins": -0.00017014730838127434, "rewards/rejected": 0.003104335628449917, "sft_loss": 0.41538387537002563, "step": 9 }, { "epoch": 0.9221902017291066, "grad_norm": 0.4166741371154785, "learning_rate": 0.0, "logits/chosen": 0.12583141028881073, "logits/rejected": 0.25348690152168274, "logps/chosen": -124.92916107177734, "logps/ref_chosen": -125.44955444335938, "logps/ref_rejected": -160.47593688964844, "logps/rejected": -159.9407958984375, "loss": 0.466, "rewards/accuracies": 0.5390625, "rewards/chosen": 0.0052038198336958885, "rewards/margins": -0.00014770496636629105, "rewards/rejected": 0.0053515248000621796, "sft_loss": 0.4205903112888336, "step": 10 }, { "epoch": 0.9221902017291066, "step": 10, "total_flos": 0.0, "train_loss": 0.48907283544540403, "train_runtime": 630.6427, "train_samples_per_second": 2.199, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }