|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9221902017291066, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 0.3019522428512573, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.06190446391701698, |
|
"logits/rejected": 0.24964340031147003, |
|
"logps/chosen": -133.9302215576172, |
|
"logps/ref_chosen": -134.30517578125, |
|
"logps/ref_rejected": -173.56590270996094, |
|
"logps/rejected": -173.0438232421875, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": 0.0037494890857487917, |
|
"rewards/margins": -0.0014713926939293742, |
|
"rewards/rejected": 0.005220881663262844, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.41573917865753174, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 0.1268736720085144, |
|
"logits/rejected": 0.2718392312526703, |
|
"logps/chosen": -138.44927978515625, |
|
"logps/ref_chosen": -138.6737518310547, |
|
"logps/ref_rejected": -170.81944274902344, |
|
"logps/rejected": -170.447265625, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.0022446608636528254, |
|
"rewards/margins": -0.0014773242874071002, |
|
"rewards/rejected": 0.0037219852674752474, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.3277066946029663, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": 0.1311892718076706, |
|
"logits/rejected": 0.26672443747520447, |
|
"logps/chosen": -132.68569946289062, |
|
"logps/ref_chosen": -132.93014526367188, |
|
"logps/ref_rejected": -165.84507751464844, |
|
"logps/rejected": -165.56008911132812, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.0024443636648356915, |
|
"rewards/margins": -0.00040555946179665625, |
|
"rewards/rejected": 0.0028499234467744827, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.41292324662208557, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 0.08853106200695038, |
|
"logits/rejected": 0.23616701364517212, |
|
"logps/chosen": -137.2488250732422, |
|
"logps/ref_chosen": -137.4427032470703, |
|
"logps/ref_rejected": -177.97886657714844, |
|
"logps/rejected": -177.62860107421875, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": 0.001938714412972331, |
|
"rewards/margins": -0.0015639358898624778, |
|
"rewards/rejected": 0.003502650186419487, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.31257063150405884, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": 0.12685821950435638, |
|
"logits/rejected": 0.23144984245300293, |
|
"logps/chosen": -131.0224609375, |
|
"logps/ref_chosen": -131.1569061279297, |
|
"logps/ref_rejected": -164.11549377441406, |
|
"logps/rejected": -163.87142944335938, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": 0.0013444966170936823, |
|
"rewards/margins": -0.001095889019779861, |
|
"rewards/rejected": 0.0024403855204582214, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 0.36982661485671997, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 0.16632890701293945, |
|
"logits/rejected": 0.27399736642837524, |
|
"logps/chosen": -130.6382598876953, |
|
"logps/ref_chosen": -130.83815002441406, |
|
"logps/ref_rejected": -160.47244262695312, |
|
"logps/rejected": -160.15945434570312, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.3671875, |
|
"rewards/chosen": 0.001998710911720991, |
|
"rewards/margins": -0.0011310731060802937, |
|
"rewards/rejected": 0.003129784483462572, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.27344921231269836, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": 0.11610936373472214, |
|
"logits/rejected": 0.24762782454490662, |
|
"logps/chosen": -131.18833923339844, |
|
"logps/ref_chosen": -131.2764892578125, |
|
"logps/ref_rejected": -175.32669067382812, |
|
"logps/rejected": -175.1273193359375, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.4296875, |
|
"rewards/chosen": 0.0008815132896415889, |
|
"rewards/margins": -0.0011121523566544056, |
|
"rewards/rejected": 0.0019936657045036554, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 0.3876575529575348, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 0.0759858638048172, |
|
"logits/rejected": 0.2628093659877777, |
|
"logps/chosen": -131.0194549560547, |
|
"logps/ref_chosen": -131.0524139404297, |
|
"logps/ref_rejected": -162.96224975585938, |
|
"logps/rejected": -162.89967346191406, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.00032957797520793974, |
|
"rewards/margins": -0.00029635371174663305, |
|
"rewards/rejected": 0.0006259315996430814, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.829971181556196, |
|
"grad_norm": 0.3749904930591583, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": 0.12389053404331207, |
|
"logits/rejected": 0.27916383743286133, |
|
"logps/chosen": -136.4560546875, |
|
"logps/ref_chosen": -136.45892333984375, |
|
"logps/ref_rejected": -167.01512145996094, |
|
"logps/rejected": -166.9549560546875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 2.849580778274685e-05, |
|
"rewards/margins": -0.0005732894060201943, |
|
"rewards/rejected": 0.0006017851992510259, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.32978877425193787, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.1598304808139801, |
|
"logits/rejected": 0.3594120740890503, |
|
"logps/chosen": -140.08253479003906, |
|
"logps/ref_chosen": -140.051513671875, |
|
"logps/ref_rejected": -173.16290283203125, |
|
"logps/rejected": -173.14581298828125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0003102564951404929, |
|
"rewards/margins": -0.0004811614053323865, |
|
"rewards/rejected": 0.00017090495384763926, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"step": 10, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6936326384544372, |
|
"train_runtime": 616.2156, |
|
"train_samples_per_second": 2.251, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|