{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9221902017291066, "eval_steps": 500, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09221902017291066, "grad_norm": 0.3019522428512573, "learning_rate": 5e-07, "logits/chosen": 0.06190446391701698, "logits/rejected": 0.24964340031147003, "logps/chosen": -133.9302215576172, "logps/ref_chosen": -134.30517578125, "logps/ref_rejected": -173.56590270996094, "logps/rejected": -173.0438232421875, "loss": 0.6939, "rewards/accuracies": 0.4453125, "rewards/chosen": 0.0037494890857487917, "rewards/margins": -0.0014713926939293742, "rewards/rejected": 0.005220881663262844, "step": 1 }, { "epoch": 0.1844380403458213, "grad_norm": 0.41573917865753174, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.1268736720085144, "logits/rejected": 0.2718392312526703, "logps/chosen": -138.44927978515625, "logps/ref_chosen": -138.6737518310547, "logps/ref_rejected": -170.81944274902344, "logps/rejected": -170.447265625, "loss": 0.6939, "rewards/accuracies": 0.34375, "rewards/chosen": 0.0022446608636528254, "rewards/margins": -0.0014773242874071002, "rewards/rejected": 0.0037219852674752474, "step": 2 }, { "epoch": 0.276657060518732, "grad_norm": 0.3277066946029663, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.1311892718076706, "logits/rejected": 0.26672443747520447, "logps/chosen": -132.68569946289062, "logps/ref_chosen": -132.93014526367188, "logps/ref_rejected": -165.84507751464844, "logps/rejected": -165.56008911132812, "loss": 0.6934, "rewards/accuracies": 0.4765625, "rewards/chosen": 0.0024443636648356915, "rewards/margins": -0.00040555946179665625, "rewards/rejected": 0.0028499234467744827, "step": 3 }, { "epoch": 0.3688760806916426, "grad_norm": 0.41292324662208557, "learning_rate": 3.75e-07, "logits/chosen": 0.08853106200695038, "logits/rejected": 0.23616701364517212, "logps/chosen": -137.2488250732422, "logps/ref_chosen": -137.4427032470703, "logps/ref_rejected": -177.97886657714844, "logps/rejected": -177.62860107421875, "loss": 0.6939, "rewards/accuracies": 0.421875, "rewards/chosen": 0.001938714412972331, "rewards/margins": -0.0015639358898624778, "rewards/rejected": 0.003502650186419487, "step": 4 }, { "epoch": 0.4610951008645533, "grad_norm": 0.31257063150405884, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.12685821950435638, "logits/rejected": 0.23144984245300293, "logps/chosen": -131.0224609375, "logps/ref_chosen": -131.1569061279297, "logps/ref_rejected": -164.11549377441406, "logps/rejected": -163.87142944335938, "loss": 0.6937, "rewards/accuracies": 0.4453125, "rewards/chosen": 0.0013444966170936823, "rewards/margins": -0.001095889019779861, "rewards/rejected": 0.0024403855204582214, "step": 5 }, { "epoch": 0.553314121037464, "grad_norm": 0.36982661485671997, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.16632890701293945, "logits/rejected": 0.27399736642837524, "logps/chosen": -130.6382598876953, "logps/ref_chosen": -130.83815002441406, "logps/ref_rejected": -160.47244262695312, "logps/rejected": -160.15945434570312, "loss": 0.6937, "rewards/accuracies": 0.3671875, "rewards/chosen": 0.001998710911720991, "rewards/margins": -0.0011310731060802937, "rewards/rejected": 0.003129784483462572, "step": 6 }, { "epoch": 0.6455331412103746, "grad_norm": 0.27344921231269836, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.11610936373472214, "logits/rejected": 0.24762782454490662, "logps/chosen": -131.18833923339844, "logps/ref_chosen": -131.2764892578125, "logps/ref_rejected": -175.32669067382812, "logps/rejected": -175.1273193359375, "loss": 0.6937, "rewards/accuracies": 0.4296875, "rewards/chosen": 0.0008815132896415889, "rewards/margins": -0.0011121523566544056, "rewards/rejected": 0.0019936657045036554, "step": 7 }, { "epoch": 0.7377521613832853, "grad_norm": 0.3876575529575348, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.0759858638048172, "logits/rejected": 0.2628093659877777, "logps/chosen": -131.0194549560547, "logps/ref_chosen": -131.0524139404297, "logps/ref_rejected": -162.96224975585938, "logps/rejected": -162.89967346191406, "loss": 0.6933, "rewards/accuracies": 0.484375, "rewards/chosen": 0.00032957797520793974, "rewards/margins": -0.00029635371174663305, "rewards/rejected": 0.0006259315996430814, "step": 8 }, { "epoch": 0.829971181556196, "grad_norm": 0.3749904930591583, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.12389053404331207, "logits/rejected": 0.27916383743286133, "logps/chosen": -136.4560546875, "logps/ref_chosen": -136.45892333984375, "logps/ref_rejected": -167.01512145996094, "logps/rejected": -166.9549560546875, "loss": 0.6934, "rewards/accuracies": 0.4609375, "rewards/chosen": 2.849580778274685e-05, "rewards/margins": -0.0005732894060201943, "rewards/rejected": 0.0006017851992510259, "step": 9 }, { "epoch": 0.9221902017291066, "grad_norm": 0.32978877425193787, "learning_rate": 0.0, "logits/chosen": 0.1598304808139801, "logits/rejected": 0.3594120740890503, "logps/chosen": -140.08253479003906, "logps/ref_chosen": -140.051513671875, "logps/ref_rejected": -173.16290283203125, "logps/rejected": -173.14581298828125, "loss": 0.6934, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0003102564951404929, "rewards/margins": -0.0004811614053323865, "rewards/rejected": 0.00017090495384763926, "step": 10 }, { "epoch": 0.9221902017291066, "step": 10, "total_flos": 0.0, "train_loss": 0.6936326384544372, "train_runtime": 616.2156, "train_samples_per_second": 2.251, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }