{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9221902017291066, "eval_steps": 500, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09221902017291066, "grad_norm": 0.48872771859169006, "learning_rate": 5e-07, "logits/chosen": 0.12278681248426437, "logits/rejected": 0.26080453395843506, "logps/chosen": -132.40695190429688, "logps/ref_chosen": -121.57450103759766, "logps/ref_rejected": -166.17074584960938, "logps/rejected": -177.05752563476562, "loss": 0.5381, "rewards/accuracies": 0.578125, "rewards/chosen": -0.10832449048757553, "rewards/margins": 0.000543450063560158, "rewards/rejected": -0.10886794328689575, "sft_loss": 0.4607769250869751, "step": 1 }, { "epoch": 0.1844380403458213, "grad_norm": 0.7124770283699036, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.1629990041255951, "logits/rejected": 0.3135248124599457, "logps/chosen": -134.13861083984375, "logps/ref_chosen": -123.470703125, "logps/ref_rejected": -159.41018676757812, "logps/rejected": -170.13693237304688, "loss": 0.5337, "rewards/accuracies": 0.546875, "rewards/chosen": -0.10667898505926132, "rewards/margins": 0.000588476425036788, "rewards/rejected": -0.10726746171712875, "sft_loss": 0.4541751444339752, "step": 2 }, { "epoch": 0.276657060518732, "grad_norm": 0.5838692784309387, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.09741576015949249, "logits/rejected": 0.2005864381790161, "logps/chosen": -134.69981384277344, "logps/ref_chosen": -124.5863037109375, "logps/ref_rejected": -152.92868041992188, "logps/rejected": -163.17599487304688, "loss": 0.5444, "rewards/accuracies": 0.5390625, "rewards/chosen": -0.10113508999347687, "rewards/margins": 0.0013378520961850882, "rewards/rejected": -0.1024729460477829, "sft_loss": 0.4703802764415741, "step": 3 }, { "epoch": 0.3688760806916426, "grad_norm": 0.5232725739479065, "learning_rate": 3.75e-07, "logits/chosen": 0.11469627916812897, "logits/rejected": 0.34779858589172363, "logps/chosen": -131.96142578125, "logps/ref_chosen": -123.3237533569336, "logps/ref_rejected": -160.29180908203125, "logps/rejected": -169.1221466064453, "loss": 0.5541, "rewards/accuracies": 0.6015625, "rewards/chosen": -0.08637689054012299, "rewards/margins": 0.00192636763677001, "rewards/rejected": -0.08830326050519943, "sft_loss": 0.48508843779563904, "step": 4 }, { "epoch": 0.4610951008645533, "grad_norm": 0.4373687505722046, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.13797219097614288, "logits/rejected": 0.2943338453769684, "logps/chosen": -129.5037841796875, "logps/ref_chosen": -122.18938446044922, "logps/ref_rejected": -166.5259246826172, "logps/rejected": -173.7913055419922, "loss": 0.5453, "rewards/accuracies": 0.4921875, "rewards/chosen": -0.07314412295818329, "rewards/margins": -0.0004904267261736095, "rewards/rejected": -0.07265370339155197, "sft_loss": 0.4711988866329193, "step": 5 }, { "epoch": 0.553314121037464, "grad_norm": 0.41881853342056274, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.1323821097612381, "logits/rejected": 0.31871122121810913, "logps/chosen": -132.2285614013672, "logps/ref_chosen": -125.81028747558594, "logps/ref_rejected": -156.31320190429688, "logps/rejected": -162.79730224609375, "loss": 0.5281, "rewards/accuracies": 0.53125, "rewards/chosen": -0.06418267637491226, "rewards/margins": 0.0006581920897588134, "rewards/rejected": -0.06484086811542511, "sft_loss": 0.44570034742355347, "step": 6 }, { "epoch": 0.6455331412103746, "grad_norm": 0.3571808934211731, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.18207934498786926, "logits/rejected": 0.28170838952064514, "logps/chosen": -130.76219177246094, "logps/ref_chosen": -128.7080535888672, "logps/ref_rejected": -158.98272705078125, "logps/rejected": -161.1389923095703, "loss": 0.5175, "rewards/accuracies": 0.53125, "rewards/chosen": -0.02054133079946041, "rewards/margins": 0.0010213707573711872, "rewards/rejected": -0.02156270295381546, "sft_loss": 0.4298672676086426, "step": 7 }, { "epoch": 0.7377521613832853, "grad_norm": 0.4075394570827484, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.10367591679096222, "logits/rejected": 0.28194165229797363, "logps/chosen": -124.07286834716797, "logps/ref_chosen": -123.2734375, "logps/ref_rejected": -156.3369903564453, "logps/rejected": -157.23104858398438, "loss": 0.5234, "rewards/accuracies": 0.6015625, "rewards/chosen": -0.007994325831532478, "rewards/margins": 0.0009461576119065285, "rewards/rejected": -0.008940483443439007, "sft_loss": 0.4388274848461151, "step": 8 }, { "epoch": 0.829971181556196, "grad_norm": 0.230843648314476, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.06345280259847641, "logits/rejected": 0.206377774477005, "logps/chosen": -127.7571792602539, "logps/ref_chosen": -127.38499450683594, "logps/ref_rejected": -157.14312744140625, "logps/rejected": -157.56546020507812, "loss": 0.5129, "rewards/accuracies": 0.5859375, "rewards/chosen": -0.00372200645506382, "rewards/margins": 0.0005012964247725904, "rewards/rejected": -0.004223302938044071, "sft_loss": 0.42283880710601807, "step": 9 }, { "epoch": 0.9221902017291066, "grad_norm": 0.43585899472236633, "learning_rate": 0.0, "logits/chosen": 0.08928333222866058, "logits/rejected": 0.238207146525383, "logps/chosen": -124.22206115722656, "logps/ref_chosen": -123.99081420898438, "logps/ref_rejected": -154.0594940185547, "logps/rejected": -154.32521057128906, "loss": 0.5223, "rewards/accuracies": 0.5234375, "rewards/chosen": -0.0023124003782868385, "rewards/margins": 0.00034469965612515807, "rewards/rejected": -0.002657099859789014, "sft_loss": 0.4369175434112549, "step": 10 }, { "epoch": 0.9221902017291066, "step": 10, "total_flos": 0.0, "train_loss": 0.5319805860519409, "train_runtime": 668.7287, "train_samples_per_second": 2.074, "train_steps_per_second": 0.015 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }