{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9221902017291066, "eval_steps": 500, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09221902017291066, "grad_norm": 0.3625384569168091, "learning_rate": 5e-07, "logits/chosen": 0.12041429430246353, "logits/rejected": 0.2929443418979645, "logps/chosen": -134.8220672607422, "logps/ref_chosen": -135.22447204589844, "logps/ref_rejected": -175.8645782470703, "logps/rejected": -175.39471435546875, "loss": 0.6935, "rewards/accuracies": 0.4609375, "rewards/chosen": 0.004024160094559193, "rewards/margins": -0.000674244191031903, "rewards/rejected": 0.0046984050422906876, "step": 1 }, { "epoch": 0.1844380403458213, "grad_norm": 0.43253499269485474, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.18826183676719666, "logits/rejected": 0.31612664461135864, "logps/chosen": -134.79527282714844, "logps/ref_chosen": -135.17901611328125, "logps/ref_rejected": -167.55538940429688, "logps/rejected": -167.03347778320312, "loss": 0.6938, "rewards/accuracies": 0.3671875, "rewards/chosen": 0.0038375072181224823, "rewards/margins": -0.0013817482395097613, "rewards/rejected": 0.0052192555740475655, "step": 2 }, { "epoch": 0.276657060518732, "grad_norm": 0.4659630358219147, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.08171764016151428, "logits/rejected": 0.19102837145328522, "logps/chosen": -130.4796600341797, "logps/ref_chosen": -130.83258056640625, "logps/ref_rejected": -162.6024627685547, "logps/rejected": -162.10250854492188, "loss": 0.6939, "rewards/accuracies": 0.390625, "rewards/chosen": 0.00352896424010396, "rewards/margins": -0.001470521092414856, "rewards/rejected": 0.004999485332518816, "step": 3 }, { "epoch": 0.3688760806916426, "grad_norm": 0.29478588700294495, "learning_rate": 3.75e-07, "logits/chosen": 0.08501344174146652, "logits/rejected": 0.2843908369541168, "logps/chosen": -131.01138305664062, "logps/ref_chosen": -131.26690673828125, "logps/ref_rejected": -169.983154296875, "logps/rejected": -169.63084411621094, "loss": 0.6936, "rewards/accuracies": 0.4453125, "rewards/chosen": 0.002555293031036854, "rewards/margins": -0.0009679353097453713, "rewards/rejected": 0.0035232282243669033, "step": 4 }, { "epoch": 0.4610951008645533, "grad_norm": 0.38472220301628113, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.17568331956863403, "logits/rejected": 0.3496350049972534, "logps/chosen": -132.62481689453125, "logps/ref_chosen": -132.73873901367188, "logps/ref_rejected": -169.95010375976562, "logps/rejected": -169.81109619140625, "loss": 0.6933, "rewards/accuracies": 0.453125, "rewards/chosen": 0.0011391888838261366, "rewards/margins": -0.00025093427393585443, "rewards/rejected": 0.0013901233905926347, "step": 5 }, { "epoch": 0.553314121037464, "grad_norm": 0.3507808446884155, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.09320589154958725, "logits/rejected": 0.29253625869750977, "logps/chosen": -133.10009765625, "logps/ref_chosen": -133.45578002929688, "logps/ref_rejected": -173.9835205078125, "logps/rejected": -173.6336212158203, "loss": 0.6931, "rewards/accuracies": 0.5078125, "rewards/chosen": 0.003556814743205905, "rewards/margins": 5.76310558244586e-05, "rewards/rejected": 0.003499183803796768, "step": 6 }, { "epoch": 0.6455331412103746, "grad_norm": 0.2825755178928375, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.16687946021556854, "logits/rejected": 0.29598817229270935, "logps/chosen": -139.96351623535156, "logps/ref_chosen": -140.12921142578125, "logps/ref_rejected": -171.02523803710938, "logps/rejected": -170.8452911376953, "loss": 0.6932, "rewards/accuracies": 0.4921875, "rewards/chosen": 0.001656890381127596, "rewards/margins": -0.00014275184366852045, "rewards/rejected": 0.0017996423412114382, "step": 7 }, { "epoch": 0.7377521613832853, "grad_norm": 0.3293827474117279, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.12655451893806458, "logits/rejected": 0.3025429844856262, "logps/chosen": -131.79420471191406, "logps/ref_chosen": -131.82289123535156, "logps/ref_rejected": -170.08551025390625, "logps/rejected": -170.10638427734375, "loss": 0.6929, "rewards/accuracies": 0.5234375, "rewards/chosen": 0.00028696414665319026, "rewards/margins": 0.0004956752527505159, "rewards/rejected": -0.00020871106244158, "step": 8 }, { "epoch": 0.829971181556196, "grad_norm": 0.36158400774002075, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.09881015121936798, "logits/rejected": 0.2709089517593384, "logps/chosen": -133.0918426513672, "logps/ref_chosen": -133.32278442382812, "logps/ref_rejected": -168.0214080810547, "logps/rejected": -167.9148406982422, "loss": 0.6925, "rewards/accuracies": 0.5859375, "rewards/chosen": 0.002309603150933981, "rewards/margins": 0.0012440752470865846, "rewards/rejected": 0.0010655277874320745, "step": 9 }, { "epoch": 0.9221902017291066, "grad_norm": 0.3515172600746155, "learning_rate": 0.0, "logits/chosen": 0.17717128992080688, "logits/rejected": 0.3308084011077881, "logps/chosen": -132.75823974609375, "logps/ref_chosen": -132.620849609375, "logps/ref_rejected": -157.32113647460938, "logps/rejected": -157.39456176757812, "loss": 0.6935, "rewards/accuracies": 0.3984375, "rewards/chosen": -0.0013737636618316174, "rewards/margins": -0.0006395292584784329, "rewards/rejected": -0.0007342344033531845, "step": 10 }, { "epoch": 0.9221902017291066, "step": 10, "total_flos": 0.0, "train_loss": 0.6933379590511322, "train_runtime": 621.366, "train_samples_per_second": 2.232, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }