{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9182209469153515, "eval_steps": 500, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09182209469153516, "grad_norm": 0.48370492458343506, "learning_rate": 5e-07, "logits/chosen": 0.16483092308044434, "logits/rejected": 0.3757104277610779, "logps/chosen": -129.87265014648438, "logps/ref_chosen": -130.11143493652344, "logps/ref_rejected": -170.61505126953125, "logps/rejected": -170.22250366210938, "loss": 0.6939, "rewards/accuracies": 0.3359375, "rewards/chosen": 0.0023880640510469675, "rewards/margins": -0.0015374040231108665, "rewards/rejected": 0.003925468306988478, "step": 1 }, { "epoch": 0.1836441893830703, "grad_norm": 0.2921633720397949, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.21723544597625732, "logits/rejected": 0.33153384923934937, "logps/chosen": -136.4618377685547, "logps/ref_chosen": -136.66879272460938, "logps/ref_rejected": -170.20431518554688, "logps/rejected": -169.85755920410156, "loss": 0.6939, "rewards/accuracies": 0.390625, "rewards/chosen": 0.0020694590639322996, "rewards/margins": -0.0013981539523229003, "rewards/rejected": 0.003467612899839878, "step": 2 }, { "epoch": 0.27546628407460544, "grad_norm": 0.3820483684539795, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.278810977935791, "logits/rejected": 0.3778008222579956, "logps/chosen": -140.81326293945312, "logps/ref_chosen": -141.2205047607422, "logps/ref_rejected": -163.39132690429688, "logps/rejected": -162.9018096923828, "loss": 0.6936, "rewards/accuracies": 0.453125, "rewards/chosen": 0.004072446841746569, "rewards/margins": -0.000822849222458899, "rewards/rejected": 0.004895296413451433, "step": 3 }, { "epoch": 0.3672883787661406, "grad_norm": 0.34646955132484436, "learning_rate": 3.75e-07, "logits/chosen": 0.14010563492774963, "logits/rejected": 0.28288155794143677, "logps/chosen": -140.85638427734375, "logps/ref_chosen": -141.0355987548828, "logps/ref_rejected": -166.15609741210938, "logps/rejected": -165.95912170410156, "loss": 0.6932, "rewards/accuracies": 0.5, "rewards/chosen": 0.0017920829122886062, "rewards/margins": -0.00017783755902200937, "rewards/rejected": 0.001969920238479972, "step": 4 }, { "epoch": 0.45911047345767575, "grad_norm": 0.3133969306945801, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.20951783657073975, "logits/rejected": 0.33099910616874695, "logps/chosen": -136.3818817138672, "logps/ref_chosen": -136.40235900878906, "logps/ref_rejected": -158.99447631835938, "logps/rejected": -158.85215759277344, "loss": 0.6938, "rewards/accuracies": 0.359375, "rewards/chosen": 0.00020450774172786623, "rewards/margins": -0.0012185449013486505, "rewards/rejected": 0.0014230526285246015, "step": 5 }, { "epoch": 0.5509325681492109, "grad_norm": 0.413125604391098, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.1270689070224762, "logits/rejected": 0.35084813833236694, "logps/chosen": -134.93136596679688, "logps/ref_chosen": -134.92478942871094, "logps/ref_rejected": -171.18968200683594, "logps/rejected": -171.1276092529297, "loss": 0.6935, "rewards/accuracies": 0.4765625, "rewards/chosen": -6.584226503036916e-05, "rewards/margins": -0.0006865662289783359, "rewards/rejected": 0.0006207239348441362, "step": 6 }, { "epoch": 0.642754662840746, "grad_norm": 0.3633699417114258, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.160421222448349, "logits/rejected": 0.31845855712890625, "logps/chosen": -131.51528930664062, "logps/ref_chosen": -131.45591735839844, "logps/ref_rejected": -161.3601531982422, "logps/rejected": -161.34597778320312, "loss": 0.6935, "rewards/accuracies": 0.390625, "rewards/chosen": -0.0005936581292189658, "rewards/margins": -0.0007353991386480629, "rewards/rejected": 0.00014174105308484286, "step": 7 }, { "epoch": 0.7345767575322812, "grad_norm": 0.31316861510276794, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.20960983633995056, "logits/rejected": 0.41680708527565, "logps/chosen": -136.77139282226562, "logps/ref_chosen": -136.7902069091797, "logps/ref_rejected": -181.39559936523438, "logps/rejected": -181.3594512939453, "loss": 0.6932, "rewards/accuracies": 0.4609375, "rewards/chosen": 0.00018815101066138595, "rewards/margins": -0.0001733488024910912, "rewards/rejected": 0.0003614998422563076, "step": 8 }, { "epoch": 0.8263988522238164, "grad_norm": 0.33593055605888367, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.21831227838993073, "logits/rejected": 0.32834041118621826, "logps/chosen": -137.8075408935547, "logps/ref_chosen": -137.73573303222656, "logps/ref_rejected": -171.2981719970703, "logps/rejected": -171.36036682128906, "loss": 0.6932, "rewards/accuracies": 0.484375, "rewards/chosen": -0.0007180434186011553, "rewards/margins": -9.611125278752297e-05, "rewards/rejected": -0.0006219320930540562, "step": 9 }, { "epoch": 0.9182209469153515, "grad_norm": 0.3731705844402313, "learning_rate": 0.0, "logits/chosen": 0.15047258138656616, "logits/rejected": 0.27864834666252136, "logps/chosen": -146.82630920410156, "logps/ref_chosen": -146.76913452148438, "logps/ref_rejected": -181.7820281982422, "logps/rejected": -181.79539489746094, "loss": 0.6934, "rewards/accuracies": 0.5, "rewards/chosen": -0.000571864191442728, "rewards/margins": -0.0004380465252324939, "rewards/rejected": -0.000133817782625556, "step": 10 }, { "epoch": 0.9182209469153515, "step": 10, "total_flos": 0.0, "train_loss": 0.6935156106948852, "train_runtime": 611.1413, "train_samples_per_second": 2.279, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }